Source of data: https://datahub.io/machine-learning/autos Source of data: https://worldhappiness.report/download/ The best plots appear when we combine various data! Only Africa and only 2017. Adds BMI indicator amplifierimport pandas as pd
import matplotlib.pyplot as plt
import numpy as np
Autos
df2= pd.read_csv('c:/1/autos.csv')
df2.head()
fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')
plt.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
plt.title("Bubble Plot of Autos Arean(color: 'price & size: 'city_mpg')", fontsize=16)
plt.xlabel('horsepower', fontsize=18)
plt.ylabel('engine_size', fontsize=18)
plt.colorbar()
plt.show()
fig, ax = plt.subplots(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')
ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
ax.set_title("Bubble Plot of Autos Arean(color: 'price & size: 'engine_size')", fontsize=16)
ax.set_xlabel('horsepower', fontsize=18)
ax.set_ylabel('engine_size', fontsize=18)
## Sztuczka żeby mieć colorbar
AA = ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
plt.colorbar(AA)
### DRUGI SPOSÓB
#im = ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
#fig.colorbar(im, ax=ax)
handles, labels = AA.legend_elements(prop="sizes", alpha=0.6)
legend2 = ax.legend(handles, labels, loc="upper left", title="Sizes")
## sztuczka żeby mieć podpisy na kólkach
for i, txt in enumerate(df2['make']):
ax.annotate(txt, (df2['horsepower'][i],df2['engine_size'] [i]))
plt.show()
df = pd.read_csv('c:/2/midwest_filter.csv')
df.head()
# Plot
fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')
plt.scatter('area', 'poptotal', data=df, s='dot_size', c='popdensity', cmap='Reds', edgecolors='blue', linewidths=0.8)
plt.title("Bubble Plot of PopTotal vs Arean(color: 'popdensity' & size: 'dot_size' - both are numeric columns in midwest)", fontsize=16)
plt.xlabel('Area', fontsize=18)
plt.ylabel('Poptotal', fontsize=18)
plt.colorbar()
plt.show()
fig, ax = plt.subplots(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')
ax.scatter('area', 'poptotal', data=df, s='dot_size', c='popdensity', cmap='YlGn', edgecolors='blue', linewidths=0.8)
ax.set_title("Bubble Plot of PopTotal vs Arean color: 'popdensity' & size: 'dot_size'", fontsize=16)
ax.set_xlabel('Area', fontsize=18)
ax.set_ylabel('Poptotal', fontsize=18)
## Sztuczka żeby mieć colorbar
BB = ax.scatter('area', 'poptotal', data=df, s='dot_size', c='popdensity', cmap='YlGn', edgecolors='blue', linewidths=0.8)
plt.colorbar(BB)
### DRUGI SPOSÓB
#im = ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
#fig.colorbar(im, ax=ax)
### legenda do wielkości kółek
handles, labels = BB.legend_elements(prop="sizes", alpha=0.6)
legend = ax.legend(handles, labels, loc="lower right", title="Sizes")
## sztuczka żeby mieć podpisy na kólkach
for i, txt in enumerate(df['county']):
ax.annotate(txt, (df['area'][i],df['poptotal'] [i]))
plt.show()
WorldHappinessReport
df3= pd.read_csv('c:/1/WorldHappinessReport.csv')
df3 = df3[df3['Year']==2017]
df3.tail(2)
df4 = pd.read_csv('c:/1/WorldPopulation.csv')
df4.head(2)
D3 = df4.set_index('Country Name')['2017'].to_dict()
#D3
df3['Population2017'] = df3['Country'].map(D3)
df3['Population2017'] = df3['Population2017']/100000
df3.isnull().sum()
df3 = df3.dropna(how='any')
df3.isnull().sum()
kot = ['Sub-Saharan Africa','Middle East and Northern Africa']
AFR = df3[df3['Region'].isin(kot)]
AFR.head(2)
AFR.to_csv('c:/8/AfricaHappinessReport2017.csv')
df10 = pd.read_csv('c:/8/AfricaHappinessReport2017.csv')
df10.head(2)
fig, ax = plt.subplots(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')
ax.scatter('Happiness Score', 'Freedom', data=df10, s='Population2017', c='Freedom', cmap='RdYlGn', edgecolors='grey', linewidths=0.8)
ax.set_title("AFRICA 2017 Happiness & Freedomn(color: 'Economy (GDP per Capita)' & size: 'Population2017')", fontsize=16)
ax.set_xlabel('Happiness Score', fontsize=18)
ax.set_ylabel('Freedom', fontsize=18)
## Sztuczka żeby mieć colorbar
CC = ax.scatter('Happiness Score', 'Freedom', data=df10, s='Population2017', c='Freedom', cmap='RdYlGn', edgecolors='grey', linewidths=0.8)
plt.colorbar(CC)
### DRUGI SPOSÓB
#im = ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
#fig.colorbar(im, ax=ax)
### Sztuczka, żeby mieć legende do size - nie działa dla danych ciągłych (musi byc tylko kilka klas)
handles, labels = CC.legend_elements(prop="sizes", alpha=0.1)
legend2 = ax.legend(handles, labels, loc="upper left", title="Sizes")
## sztuczka żeby mieć podpisy na kólkach
for i, txt in enumerate(df10['Country']):
ax.annotate(txt, (df10['Happiness Score'][i],df10['Freedom'] [i]))
plt.show()
df2= pd.read_csv('c:/1/diabetes.csv')
df2.head(2)
df2['BMI_class'] = ((pd.qcut(df2['BMI'],5, labels=False).astype(int))+1)*70
fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')
plt.scatter('Age', 'Glucose', data=df2, s='BMI_class', c='BloodPressure', cmap='YlOrBr', edgecolors='blue', linewidths=0.8)
plt.title("Bubble Plot of Diabetesn color: BloodPressure & size: BMI", fontsize=16)
plt.xlabel('Age', fontsize=18)
plt.ylabel('Glucose', fontsize=18)
plt.colorbar()
plt.show()