Perfect Plots Bubble Plot

 

Perfect Plots: Bubble Plot

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
In [2]:
df2= pd.read_csv('/home/wojciech/Pulpit/1/autos.csv')
df2.head(3)
Out[2]:
  Unnamed: 0 symboling normalized_losses make fuel_type aspiration num_doors body_style drive_wheels engine_location engine_size fuel_system bore stroke compression_ratio horsepower peak_rpm city_mpg highway_mpg price
0 0 3 NaN alfa-romero gas std two convertible rwd front 130 mpfi 3.47 2.68 9.0 111.0 5000.0 21 27 13495.0
1 1 3 NaN alfa-romero gas std two convertible rwd front 130 mpfi 3.47 2.68 9.0 111.0 5000.0 21 27 16500.0
2 2 1 NaN alfa-romero gas std two hatchback rwd front 152 mpfi 2.68 3.47 9.0 154.0 5000.0 19 26 16500.0

3 rows × 27 columns

 

I am making a synthetic variable that is supposed to increase the bubble – fuel consumption.

In [3]:
df2['city_mpg2'] = df2['city_mpg']*30
In [4]:
class Bubble_Plot_1:
    
    def __init__(self,df,X,Y,size, kolor, title):
        self.df = df
        self.X = X
        self.Y = Y
        self.size = size
        self.kolor = kolor
        self.title = title
    
    def buble(self):
        fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    
        plt.scatter(X, Y, data=df, s=size, c=kolor, cmap='PuBu', edgecolors='grey', linewidths=0.8)
        ## cmap='YlGn','PuBu','YlOrBr','RdYlGn'
        plt.title(title, fontsize=16)
        plt.xlabel(X, fontsize=18)
        plt.ylabel(Y, fontsize=18)
        plt.colorbar()

        plt.show()    

    
    
import matplotlib.pyplot as plt
In [5]:
df=df2

X = 'horsepower'
Y = 'engine_size'
size = 'city_mpg2'
kolor = 'price'
title = 'Car comparison'  #<-- Tytuł wpisujemy z ręki

ZNP = Bubble_Plot_1(df,X,Y,size, kolor, title)
ZNP.buble()
In [6]:
class Bubble_Plot_2:
    
    def __init__(self, df, X, Y, size, kolor, title, title_leg, title_bub):
        self.df = df
        self.X = X
        self.Y = Y
        self.size = size
        self.kolor = kolor
        self.title = title
        self.title = title_leg
        self.title = title_bub
    
    def buble2(self):
        fig, ax = plt.subplots(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    
        ax.scatter(X, Y, data=df, s=size, c=kolor, cmap='RdYlGn', edgecolors='grey', linewidths=0.8)
        ax.set_title(title, fontsize=14)
        ax.set_xlabel(X, fontsize=12)
        ax.set_ylabel(Y, fontsize=12)
        ## cmap='YlGn','PuBu','YlOrBr','RdYlGn'

        ## Sztuczka żeby mieć colorbar
        AA = ax.scatter(X, Y, data=df, s=size, c=kolor, cmap='RdYlGn', edgecolors='grey', linewidths=0.1)
        plt.colorbar(AA)

        handles, labels = AA.legend_elements(prop="sizes", alpha=0.2)
        legend2 = ax.legend(handles, labels, loc="upper left", title=title_leg)
        ## sztuczka żeby mieć podpisy na kólkach
        for i, txt in enumerate(df[title_bub]):
            ax.annotate(txt, (df[X][i],df[Y] [i]))

        plt.show()  
    
import matplotlib.pyplot as plt
In [7]:
df=df2

X = 'horsepower'
Y = 'engine_size'
size = 'city_mpg2'
kolor = 'price'
title = 'Car comparison'          ##<- tytuł wpisujemy z ręki
title_leg = 'fuel consumption'    ##<- tytuł wpisujemy z ręki
title_bub = 'make'


PRL = Bubble_Plot_2(df,X,Y,size, kolor, title, title_leg,title_bub)
PRL.buble2()
 

Midwest

In [8]:
df = pd.read_csv('/home/wojciech/Pulpit/2/midwest_filter.csv')
df.head()
Out[8]:
  PID county state area poptotal popdensity popwhite popblack popamerindian popasian percprof poppovertyknown percpovertyknown percbelowpoverty percchildbelowpovert percadultpoverty percelderlypoverty inmetro category dot_size
0 561 ADAMS IL 0.052 66090 1270.961540 63917 1702 98 249 4.355859 63628.0 96.274777 13.151443 18.011717 11.009776 12.443812 0.0 AAR 250.944411
1 562 ALEXANDER IL 0.014 10626 759.000000 7054 3496 19 48 2.870315 10529.0 99.087145 32.244278 45.826514 27.385647 25.228976 0.0 LHR 185.781260
2 563 BOND IL 0.022 14991 681.409091 14477 429 35 16 4.488572 14235.0 94.956974 12.068844 14.036061 10.852090 12.697410 0.0 AAR 175.905385
3 564 BOONE IL 0.017 30806 1812.117650 29344 127 46 150 4.197800 30337.0 98.477569 7.209019 11.179536 5.536013 6.217047 1.0 ALU 319.823487
4 565 BROWN IL 0.018 5836 324.222222 5264 547 14 5 3.367680 4815.0 82.505140 13.520249 13.022889 11.143211 19.200000 0.0 AAR 130.442161

5 rows × 29 columns

In [9]:
df=df

X = 'area'
Y = 'poptotal'
size = 'dot_size'
kolor = 'poptotal'
title = 'Africa cities' 

PLN = Bubble_Plot_1(df,X,Y,size, kolor, title)  
PLN.buble()
In [10]:
df=df

X = 'area'
Y = 'poptotal'
size = 'dot_size'
kolor = 'poptotal'
title = 'Africa'          ##<- tytuł wpisujemy z ręki
title_leg = 'dot_size'    ##<- tytuł wpisujemy z ręki
title_bub = 'county'

KPN = Bubble_Plot_2(df,X,Y,size, kolor, title, title_leg,title_bub )
KPN.buble2()
/home/wojciech/anaconda3/lib/python3.7/site-packages/matplotlib/collections.py:995: RuntimeWarning: invalid value encountered in greater_equal
  cond = ((label_values >= func(arr).min()) &
/home/wojciech/anaconda3/lib/python3.7/site-packages/matplotlib/collections.py:996: RuntimeWarning: invalid value encountered in less_equal
  (label_values <= func(arr).max()))
 

Diabetes

In [11]:
df3= pd.read_csv('/home/wojciech/Pulpit/1/diabetes.csv')
df3.head(2)
Out[11]:
  Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0
 

Adds BMI indicator amplifier

In [12]:
df3['BMI_class'] = ((pd.qcut(df3['BMI'],5, labels=False).astype(int))+1)*70
In [13]:
df=df3

X = 'Age'
Y = 'Glucose'
size = 'BMI_class'
kolor = 'BloodPressure'
title = 'Bubble Plot of Diabetes'  #<-- Tytuł wpisujemy z ręki

PKP = Bubble_Plot_1(df,X,Y,size, kolor, title)
PKP.buble()
In [14]:
df=df

X = 'Age'
Y = 'Glucose'
size = 'BMI_class'
kolor = 'BloodPressure'
title = 'Bubble Plot of Diabetes'  #<-- Tytuł wpisujemy z ręki
title_leg = 'BMI_class'    ##<- tytuł wpisujemy z ręki
title_bub = 'Age'

PKO = Bubble_Plot_2(df,X,Y,size, kolor, title, title_leg,title_bub )
PKO.buble2()