In [1]:

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

Autos

Source of data: https://datahub.io/machine-learning/autos

df2= pd.read_csv('c:/1/autos.csv')
df2.head()

fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    

plt.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
plt.title("Bubble Plot of Autos Arean(color: 'price & size: 'city_mpg')", fontsize=16)
plt.xlabel('horsepower', fontsize=18)
plt.ylabel('engine_size', fontsize=18)
plt.colorbar()

plt.show()

C:ProgramDataAnaconda3libsite-packagesmatplotlibcollections.py:995: RuntimeWarning: invalid value encountered in greater_equal
  cond = ((label_values >= func(arr).min()) &
C:ProgramDataAnaconda3libsite-packagesmatplotlibcollections.py:996: RuntimeWarning: invalid value encountered in less_equal
  (label_values <= func(arr).max()))

Unnamed: 0                       0
Country                          0
Region                           0
Happiness Rank                   0
Happiness Score                  0
Economy (GDP per Capita)         0
Family                           0
Health (Life Expectancy)         0
Freedom                          0
Trust (Government Corruption)    0
Generosity                       0
Dystopia Residual                0
Year                             0
Population2017                   0
dtype: int64

fig, ax = plt.subplots(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    

ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
ax.set_title("Bubble Plot of Autos Arean(color: 'price & size: 'engine_size')", fontsize=16)
ax.set_xlabel('horsepower', fontsize=18)
ax.set_ylabel('engine_size', fontsize=18)


## Sztuczka żeby mieć colorbar
AA = ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
plt.colorbar(AA)


### DRUGI SPOSÓB
#im = ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
#fig.colorbar(im, ax=ax)

handles, labels = AA.legend_elements(prop="sizes", alpha=0.6)
legend2 = ax.legend(handles, labels, loc="upper left", title="Sizes")

## sztuczka żeby mieć podpisy na kólkach
for i, txt in enumerate(df2['make']):
    ax.annotate(txt, (df2['horsepower'][i],df2['engine_size'] [i]))

plt.show()

C:ProgramDataAnaconda3libsite-packagesmatplotlibcollections.py:995: RuntimeWarning: invalid value encountered in greater_equal
  cond = ((label_values >= func(arr).min()) &
C:ProgramDataAnaconda3libsite-packagesmatplotlibcollections.py:996: RuntimeWarning: invalid value encountered in less_equal
  (label_values <= func(arr).max()))

Unnamed: 0                       0
Country                          0
Region                           0
Happiness Rank                   0
Happiness Score                  0
Economy (GDP per Capita)         0
Family                           0
Health (Life Expectancy)         0
Freedom                          0
Trust (Government Corruption)    0
Generosity                       0
Dystopia Residual                0
Year                             0
Population2017                   0
dtype: int64

Midwest

df = pd.read_csv('c:/2/midwest_filter.csv')
df.head()

# Plot
fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    
plt.scatter('area', 'poptotal', data=df, s='dot_size', c='popdensity', cmap='Reds', edgecolors='blue', linewidths=0.8)
plt.title("Bubble Plot of PopTotal vs Arean(color: 'popdensity' & size: 'dot_size' - both are numeric columns in midwest)", fontsize=16)
plt.xlabel('Area', fontsize=18)
plt.ylabel('Poptotal', fontsize=18)
plt.colorbar()
plt.show()

C:ProgramDataAnaconda3libsite-packagesmatplotlibcollections.py:995: RuntimeWarning: invalid value encountered in greater_equal
  cond = ((label_values >= func(arr).min()) &
C:ProgramDataAnaconda3libsite-packagesmatplotlibcollections.py:996: RuntimeWarning: invalid value encountered in less_equal
  (label_values <= func(arr).max()))

Unnamed: 0                       0
Country                          0
Region                           0
Happiness Rank                   0
Happiness Score                  0
Economy (GDP per Capita)         0
Family                           0
Health (Life Expectancy)         0
Freedom                          0
Trust (Government Corruption)    0
Generosity                       0
Dystopia Residual                0
Year                             0
Population2017                   0
dtype: int64

fig, ax = plt.subplots(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    

ax.scatter('area', 'poptotal', data=df, s='dot_size', c='popdensity', cmap='YlGn', edgecolors='blue', linewidths=0.8)
ax.set_title("Bubble Plot of PopTotal vs Arean color: 'popdensity' & size: 'dot_size'", fontsize=16)
ax.set_xlabel('Area', fontsize=18)
ax.set_ylabel('Poptotal', fontsize=18)
   


## Sztuczka żeby mieć colorbar
BB = ax.scatter('area', 'poptotal', data=df, s='dot_size', c='popdensity', cmap='YlGn', edgecolors='blue', linewidths=0.8)
plt.colorbar(BB)


### DRUGI SPOSÓB
#im = ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
#fig.colorbar(im, ax=ax)

### legenda do wielkości kółek
handles, labels = BB.legend_elements(prop="sizes", alpha=0.6)
legend = ax.legend(handles, labels, loc="lower right", title="Sizes")

## sztuczka żeby mieć podpisy na kólkach
for i, txt in enumerate(df['county']):
    ax.annotate(txt, (df['area'][i],df['poptotal'] [i]))

plt.show()

C:ProgramDataAnaconda3libsite-packagesmatplotlibcollections.py:995: RuntimeWarning: invalid value encountered in greater_equal
  cond = ((label_values >= func(arr).min()) &
C:ProgramDataAnaconda3libsite-packagesmatplotlibcollections.py:996: RuntimeWarning: invalid value encountered in less_equal
  (label_values <= func(arr).max()))

Unnamed: 0                       0
Country                          0
Region                           0
Happiness Rank                   0
Happiness Score                  0
Economy (GDP per Capita)         0
Family                           0
Health (Life Expectancy)         0
Freedom                          0
Trust (Government Corruption)    0
Generosity                       0
Dystopia Residual                0
Year                             0
Population2017                   0
dtype: int64

WorldHappinessReport

Source of data: https://worldhappiness.report/download/

The best plots appear when we combine various data!

df3= pd.read_csv('c:/1/WorldHappinessReport.csv')
df3 = df3[df3['Year']==2017]
df3.tail(2)

df4 = pd.read_csv('c:/1/WorldPopulation.csv')
df4.head(2)

Only Africa and only 2017.

D3 = df4.set_index('Country Name')['2017'].to_dict()
#D3

df3['Population2017'] = df3['Country'].map(D3) 
df3['Population2017'] = df3['Population2017']/100000

df3.isnull().sum()
df3 = df3.dropna(how='any')
df3.isnull().sum()

Unnamed: 0                       0
Country                          0
Region                           0
Happiness Rank                   0
Happiness Score                  0
Economy (GDP per Capita)         0
Family                           0
Health (Life Expectancy)         0
Freedom                          0
Trust (Government Corruption)    0
Generosity                       0
Dystopia Residual                0
Year                             0
Population2017                   0
dtype: int64

kot = ['Sub-Saharan Africa','Middle East and Northern Africa']
AFR = df3[df3['Region'].isin(kot)]
AFR.head(2)

AFR.to_csv('c:/8/AfricaHappinessReport2017.csv')
df10 = pd.read_csv('c:/8/AfricaHappinessReport2017.csv')
df10.head(2)

fig, ax = plt.subplots(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    

ax.scatter('Happiness Score', 'Freedom', data=df10, s='Population2017', c='Freedom', cmap='RdYlGn', edgecolors='grey', linewidths=0.8)
ax.set_title("AFRICA 2017 Happiness & Freedomn(color: 'Economy (GDP per Capita)' & size: 'Population2017')", fontsize=16)
ax.set_xlabel('Happiness Score', fontsize=18)
ax.set_ylabel('Freedom', fontsize=18)


## Sztuczka żeby mieć colorbar
CC = ax.scatter('Happiness Score', 'Freedom', data=df10, s='Population2017', c='Freedom', cmap='RdYlGn', edgecolors='grey', linewidths=0.8)
plt.colorbar(CC)


### DRUGI SPOSÓB
#im = ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
#fig.colorbar(im, ax=ax)

### Sztuczka, żeby mieć legende do size - nie działa dla danych ciągłych (musi byc tylko kilka klas)
handles, labels = CC.legend_elements(prop="sizes", alpha=0.1)
legend2 = ax.legend(handles, labels, loc="upper left", title="Sizes")

## sztuczka żeby mieć podpisy na kólkach
for i, txt in enumerate(df10['Country']):
    ax.annotate(txt, (df10['Happiness Score'][i],df10['Freedom'] [i]))

plt.show()

Diabetes

df2= pd.read_csv('c:/1/diabetes.csv')
df2.head(2)

Adds BMI indicator amplifier

df2['BMI_class'] = ((pd.qcut(df2['BMI'],5, labels=False).astype(int))+1)*70

fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    
plt.scatter('Age', 'Glucose', data=df2, s='BMI_class', c='BloodPressure', cmap='YlOrBr', edgecolors='blue', linewidths=0.8)
plt.title("Bubble Plot of Diabetesn color: BloodPressure & size: BMI", fontsize=16)
plt.xlabel('Age', fontsize=18)
plt.ylabel('Glucose', fontsize=18)
plt.colorbar()
plt.show()

	Unnamed: 0	symboling	normalized_losses	make	fuel_type	aspiration	num_doors	body_style	drive_wheels	engine_location	…	engine_size	fuel_system	bore	stroke	compression_ratio	horsepower	peak_rpm	city_mpg	highway_mpg	price
0	0	3	NaN	alfa-romero	gas	std	two	convertible	rwd	front	…	130	mpfi	3.47	2.68	9.0	111.0	5000.0	21	27	13495.0
1	1	3	NaN	alfa-romero	gas	std	two	convertible	rwd	front	…	130	mpfi	3.47	2.68	9.0	111.0	5000.0	21	27	16500.0
2	2	1	NaN	alfa-romero	gas	std	two	hatchback	rwd	front	…	152	mpfi	2.68	3.47	9.0	154.0	5000.0	19	26	16500.0
3	3	2	164.0	audi	gas	std	four	sedan	fwd	front	…	109	mpfi	3.19	3.40	10.0	102.0	5500.0	24	30	13950.0
4	4	2	164.0	audi	gas	std	four	sedan	4wd	front	…	136	mpfi	3.19	3.40	8.0	115.0	5500.0	18	22	17450.0

	PID	county	state	area	poptotal	popdensity	popwhite	popblack	popamerindian	popasian	…	percprof	poppovertyknown	percpovertyknown	percbelowpoverty	percchildbelowpovert	percadultpoverty	percelderlypoverty	inmetro	category	dot_size
0	561	ADAMS	IL	0.052	66090	1270.961540	63917	1702	98	249	…	4.355859	63628.0	96.274777	13.151443	18.011717	11.009776	12.443812	0.0	AAR	250.944411
1	562	ALEXANDER	IL	0.014	10626	759.000000	7054	3496	19	48	…	2.870315	10529.0	99.087145	32.244278	45.826514	27.385647	25.228976	0.0	LHR	185.781260
2	563	BOND	IL	0.022	14991	681.409091	14477	429	35	16	…	4.488572	14235.0	94.956974	12.068844	14.036061	10.852090	12.697410	0.0	AAR	175.905385
3	564	BOONE	IL	0.017	30806	1812.117650	29344	127	46	150	…	4.197800	30337.0	98.477569	7.209019	11.179536	5.536013	6.217047	1.0	ALU	319.823487
4	565	BROWN	IL	0.018	5836	324.222222	5264	547	14	5	…	3.367680	4815.0	82.505140	13.520249	13.022889	11.143211	19.200000	0.0	AAR	130.442161

	Unnamed: 0	Country	Region	Happiness Rank	Happiness Score	Economy (GDP per Capita)	Family	Health (Life Expectancy)	Freedom	Trust (Government Corruption)	Generosity	Dystopia Residual	Year
493	493	Zambia	Sub-Saharan Africa	116.0	4.514	0.636407	1.003187	0.257836	0.461603	0.078214	0.249580	1.826705	2017.0
494	494	Zimbabwe	Sub-Saharan Africa	138.0	3.875	0.375847	1.083096	0.196764	0.336384	0.095375	0.189143	1.597970	2017.0

	Unnamed: 0	Country Name	Country Code	1961	1962	1963	1964	1965	1966	1967	…	2009	2010	2011	2012	2013	2014	2015	2016	2017	2018
0	0	Aruba	ABW	54211.0	55438.0	56225.0	56695.0	57032.0	57360.0	57715.0	…	101353.0	101453.0	101669.0	102053.0	102577.0	103187.0	103795.0	104341.0	104822.0	105264.0
1	1	Afghanistan	AFG	8996351.0	9166764.0	9345868.0	9533954.0	9731361.0	9938414.0	10152331.0	…	27294031.0	28004331.0	28803167.0	29708599.0	30696958.0	31731688.0	32758020.0	33736494.0	34656032.0	35530081.0

	Unnamed: 0	Country	Region	Happiness Rank	Happiness Score	Economy (GDP per Capita)	Family	Health (Life Expectancy)	Freedom	Trust (Government Corruption)	Generosity	Dystopia Residual	Year	Population2017
332	332	Algeria	Middle East and Northern Africa	53.0	5.872	1.091864	1.146217	0.617585	0.233336	0.146096	0.069437	2.567604	2017.0	406.06052
333	333	Angola	Sub-Saharan Africa	140.0	3.795	0.858428	1.104412	0.049869	0.000000	0.069720	0.097926	1.614482	2017.0	288.13463

THE DATA SCIENCE LIBRARY

Wojciech Moszczyński

Perfect Plots: Bubble Plot

Autos

Midwest

WorldHappinessReport

Diabetes

	Pregnancies	Glucose	BloodPressure	SkinThickness	Insulin	BMI	DiabetesPedigreeFunction	Age	Outcome
0	6	148	72	35	0	33.6	0.627	50	1
1	1	85	66	29	0	26.6	0.351	31	0