Data plots - THE DATA SCIENCE LIBRARY http://sigmaquality.pl/category/data-plots/ Wojciech Moszczyński Mon, 13 Dec 2021 17:54:20 +0000 pl-PL hourly 1 https://wordpress.org/?v=6.8.3 https://sigmaquality.pl/wp-content/uploads/2019/02/cropped-ryba-32x32.png Data plots - THE DATA SCIENCE LIBRARY http://sigmaquality.pl/category/data-plots/ 32 32 Perfect Plots Bubble Plot https://sigmaquality.pl/data-plots/perfect-plots-bubble-plot-definitions-100420201321/ Fri, 24 Apr 2020 12:29:21 +0000 http://sigmaquality.pl/perfect-plots-bubble-plot-definitions-100420201321/   Perfect Plots: Bubble Plot Feel free to read the code on GitHub In [1]: import pandas as pd import matplotlib.pyplot as plt import numpy as [...]

Artykuł Perfect Plots Bubble Plot pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
 

Perfect Plots: Bubble Plot

Feel free to read the code on GitHub

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
In [2]:
df2= pd.read_csv('/home/wojciech/Pulpit/1/autos.csv')
df2.head(3)
Out[2]:
  Unnamed: 0 symboling normalized_losses make fuel_type aspiration num_doors body_style drive_wheels engine_location engine_size fuel_system bore stroke compression_ratio horsepower peak_rpm city_mpg highway_mpg price
0 0 3 NaN alfa-romero gas std two convertible rwd front 130 mpfi 3.47 2.68 9.0 111.0 5000.0 21 27 13495.0
1 1 3 NaN alfa-romero gas std two convertible rwd front 130 mpfi 3.47 2.68 9.0 111.0 5000.0 21 27 16500.0
2 2 1 NaN alfa-romero gas std two hatchback rwd front 152 mpfi 2.68 3.47 9.0 154.0 5000.0 19 26 16500.0

3 rows × 27 columns

 

I am making a synthetic variable that is supposed to increase the bubble – fuel consumption.

In [3]:
df2['city_mpg2'] = df2['city_mpg']*30
In [4]:
class Bubble_Plot_1:
    
    def __init__(self,df,X,Y,size, kolor, title):
        self.df = df
        self.X = X
        self.Y = Y
        self.size = size
        self.kolor = kolor
        self.title = title
    
    def buble(self):
        fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    
        plt.scatter(X, Y, data=df, s=size, c=kolor, cmap='PuBu', edgecolors='grey', linewidths=0.8)
        ## cmap='YlGn','PuBu','YlOrBr','RdYlGn'
        plt.title(title, fontsize=16)
        plt.xlabel(X, fontsize=18)
        plt.ylabel(Y, fontsize=18)
        plt.colorbar()

        plt.show()    

    
    
import matplotlib.pyplot as plt
In [5]:
df=df2

X = 'horsepower'
Y = 'engine_size'
size = 'city_mpg2'
kolor = 'price'
title = 'Car comparison'  #<-- Tytuł wpisujemy z ręki

ZNP = Bubble_Plot_1(df,X,Y,size, kolor, title)
ZNP.buble()
In [6]:
class Bubble_Plot_2:
    
    def __init__(self, df, X, Y, size, kolor, title, title_leg, title_bub):
        self.df = df
        self.X = X
        self.Y = Y
        self.size = size
        self.kolor = kolor
        self.title = title
        self.title = title_leg
        self.title = title_bub
    
    def buble2(self):
        fig, ax = plt.subplots(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    
        ax.scatter(X, Y, data=df, s=size, c=kolor, cmap='RdYlGn', edgecolors='grey', linewidths=0.8)
        ax.set_title(title, fontsize=14)
        ax.set_xlabel(X, fontsize=12)
        ax.set_ylabel(Y, fontsize=12)
        ## cmap='YlGn','PuBu','YlOrBr','RdYlGn'

        ## Sztuczka żeby mieć colorbar
        AA = ax.scatter(X, Y, data=df, s=size, c=kolor, cmap='RdYlGn', edgecolors='grey', linewidths=0.1)
        plt.colorbar(AA)

        handles, labels = AA.legend_elements(prop="sizes", alpha=0.2)
        legend2 = ax.legend(handles, labels, loc="upper left", title=title_leg)
        ## sztuczka żeby mieć podpisy na kólkach
        for i, txt in enumerate(df[title_bub]):
            ax.annotate(txt, (df[X][i],df[Y] [i]))

        plt.show()  
    
import matplotlib.pyplot as plt
In [7]:
df=df2

X = 'horsepower'
Y = 'engine_size'
size = 'city_mpg2'
kolor = 'price'
title = 'Car comparison'          ##<- tytuł wpisujemy z ręki
title_leg = 'fuel consumption'    ##<- tytuł wpisujemy z ręki
title_bub = 'make'


PRL = Bubble_Plot_2(df,X,Y,size, kolor, title, title_leg,title_bub)
PRL.buble2()
 

Midwest

In [8]:
df = pd.read_csv('/home/wojciech/Pulpit/2/midwest_filter.csv')
df.head()
Out[8]:
  PID county state area poptotal popdensity popwhite popblack popamerindian popasian percprof poppovertyknown percpovertyknown percbelowpoverty percchildbelowpovert percadultpoverty percelderlypoverty inmetro category dot_size
0 561 ADAMS IL 0.052 66090 1270.961540 63917 1702 98 249 4.355859 63628.0 96.274777 13.151443 18.011717 11.009776 12.443812 0.0 AAR 250.944411
1 562 ALEXANDER IL 0.014 10626 759.000000 7054 3496 19 48 2.870315 10529.0 99.087145 32.244278 45.826514 27.385647 25.228976 0.0 LHR 185.781260
2 563 BOND IL 0.022 14991 681.409091 14477 429 35 16 4.488572 14235.0 94.956974 12.068844 14.036061 10.852090 12.697410 0.0 AAR 175.905385
3 564 BOONE IL 0.017 30806 1812.117650 29344 127 46 150 4.197800 30337.0 98.477569 7.209019 11.179536 5.536013 6.217047 1.0 ALU 319.823487
4 565 BROWN IL 0.018 5836 324.222222 5264 547 14 5 3.367680 4815.0 82.505140 13.520249 13.022889 11.143211 19.200000 0.0 AAR 130.442161

5 rows × 29 columns

In [9]:
df=df

X = 'area'
Y = 'poptotal'
size = 'dot_size'
kolor = 'poptotal'
title = 'Africa cities' 

PLN = Bubble_Plot_1(df,X,Y,size, kolor, title)  
PLN.buble()
In [10]:
df=df

X = 'area'
Y = 'poptotal'
size = 'dot_size'
kolor = 'poptotal'
title = 'Africa'          ##<- tytuł wpisujemy z ręki
title_leg = 'dot_size'    ##<- tytuł wpisujemy z ręki
title_bub = 'county'

KPN = Bubble_Plot_2(df,X,Y,size, kolor, title, title_leg,title_bub )
KPN.buble2()
/home/wojciech/anaconda3/lib/python3.7/site-packages/matplotlib/collections.py:995: RuntimeWarning: invalid value encountered in greater_equal
  cond = ((label_values >= func(arr).min()) &
/home/wojciech/anaconda3/lib/python3.7/site-packages/matplotlib/collections.py:996: RuntimeWarning: invalid value encountered in less_equal
  (label_values <= func(arr).max()))
 

Diabetes

In [11]:
df3= pd.read_csv('/home/wojciech/Pulpit/1/diabetes.csv')
df3.head(2)
Out[11]:
  Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0
 

Adds BMI indicator amplifier

In [12]:
df3['BMI_class'] = ((pd.qcut(df3['BMI'],5, labels=False).astype(int))+1)*70
In [13]:
df=df3

X = 'Age'
Y = 'Glucose'
size = 'BMI_class'
kolor = 'BloodPressure'
title = 'Bubble Plot of Diabetes'  #<-- Tytuł wpisujemy z ręki

PKP = Bubble_Plot_1(df,X,Y,size, kolor, title)
PKP.buble()
In [14]:
df=df

X = 'Age'
Y = 'Glucose'
size = 'BMI_class'
kolor = 'BloodPressure'
title = 'Bubble Plot of Diabetes'  #<-- Tytuł wpisujemy z ręki
title_leg = 'BMI_class'    ##<- tytuł wpisujemy z ręki
title_bub = 'Age'

PKO = Bubble_Plot_2(df,X,Y,size, kolor, title, title_leg,title_bub )
PKO.buble2()

Artykuł Perfect Plots Bubble Plot pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Perfect Plots_ Matrix of corelation https://sigmaquality.pl/data-plots/perfect-plots_-matrix-of-corelation-281120190000/ Fri, 24 Apr 2020 09:58:09 +0000 http://sigmaquality.pl/perfect-plots_-matrix-of-corelation-281120190000/ Feel free to read the code on GitHub Source of data: https://archive.ics.uci.edu/ml/datasets/combined+cycle+power+plant Combined Cycle Power Plant Data Set¶ Data Set Information:¶ The dataset contains 9568 [...]

Artykuł Perfect Plots_ Matrix of corelation pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Feel free to read the code on GitHub

Combined Cycle Power Plant Data Set

Data Set Information:

The dataset contains 9568 data points collected from a Combined Cycle Power Plant over 6 years (2006-2011), when the power plant was set to work with full load. Features consist of hourly average ambient variables Temperature (T), Ambient Pressure (AP), Relative Humidity (RH) and Exhaust Vacuum (V) to predict the net hourly electrical energy output (EP) of the plant.
A combined cycle power plant (CCPP) is composed of gas turbines (GT), steam turbines (ST) and heat recovery steam generators. In a CCPP, the electricity is generated by gas and steam turbines, which are combined in one cycle, and is transferred from one turbine to another. While the Vacuum is colected from and has effect on the Steam Turbine, he other three of the ambient variables effect the GT performance.
For comparability with our baseline studies, and to allow 5×2 fold statistical tests be carried out, we provide the data shuffled five times. For each shuffling 2-fold CV is carried out and the resulting 10 measurements are used for statistical testing.
We provide the data both in .ods and in .xlsx formats.

Attribute Information:

Features consist of hourly average ambient variables

  • Temperature (T) in the range 1.81°C and 37.11°C,
  • Ambient Pressure (AP) in the range 992.89-1033.30 milibar,
  • Relative Humidity (RH) in the range 25.56
  • Exhaust Vacuum (V) in teh range 25.36-81.56 cm Hg
  • Net hourly electrical energy output (EP) 420.26-495.76 MW
    The averages are taken from various sensors located around the plant that record the ambient variables every second. The variables are given without normalization.
In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


df = pd.read_csv('/home/wojciech/Pulpit/1/Folds5x2_pp.csv')
del df['Unnamed: 0']
df.columns = ['Temperature', 'Exhaust_Vacuum', 'Ambient_Pressure', 'Relative_Humidity', 'Energy_output']
df.sample(3)
Out[1]:
Temperature Exhaust_Vacuum Ambient_Pressure Relative_Humidity Energy_output
7071 21.98 59.39 1015.25 84.52 446.79
1815 14.12 42.86 1011.84 88.29 471.86
5227 23.14 58.18 1008.89 81.82 444.51
In [2]:


sns.set(style="ticks")
corr = df.corr()

mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True

f, ax = plt.subplots(figsize=(12, 6))

cmap = sns.diverging_palette(180, 90, as_cmap=True)

sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,annot=True,
            square=True, linewidths=.9, cbar_kws={"shrink": .9})
Out[2]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd267636d10>
In [3]:
df2 = pd.read_csv('/home/wojciech/Pulpit/1/bank.csv')
del df2['Unnamed: 0']
del df2['Unnamed: 0.1']
df2.head()
Out[3]:
age job marital education default housing loan contact month day_of_week campaign pdays previous poutcome emp_var_rate cons_price_idx cons_conf_idx euribor3m nr_employed y
0 44 blue-collar married basic.4y unknown yes no cellular aug thu 1 999 0 nonexistent 1.4 93.444 -36.1 4.963 5228.1 0
1 53 technician married unknown no no no cellular nov fri 1 999 0 nonexistent -0.1 93.200 -42.0 4.021 5195.8 0
2 28 management single university.degree no yes no cellular jun thu 3 6 2 success -1.7 94.055 -39.8 0.729 4991.6 1
3 39 services married high.school no no no cellular apr fri 2 999 0 nonexistent -1.8 93.075 -47.1 1.405 5099.1 0
4 55 retired married basic.4y no yes no cellular aug fri 1 3 1 success -2.9 92.201 -31.4 0.869 5076.2 1

5 rows × 21 columns

In [4]:
sns.set(style="ticks")

corr = df2.corr()

mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True

f, ax = plt.subplots(figsize=(22, 10))
cmap = sns.diverging_palette(580, 10, as_cmap=True)

sns.heatmap(corr, mask=mask, cmap=cmap, vmax=0.3, center=0.03,annot=True,
            square=True, linewidths=.9, cbar_kws={"shrink": 0.8})
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd2910ce590>

Definition

In [5]:
def matrix_plot(df,title):

    sns.set(style="ticks")

    corr = df2.corr()

    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True

    f, ax = plt.subplots(figsize=(22, 10))
    #cmap = sns.diverging_palette(580, 10, as_cmap=True)
    cmap = sns.diverging_palette(180, 90, as_cmap=True) #Inna paleta barw

    sns.heatmap(corr, mask=mask, cmap=cmap, vmax=0.3, center=0.03,annot=True,
                square=True, linewidths=.9, cbar_kws={"shrink": 0.8})
    plt.xticks(rotation=90)
    plt.title(title,fontsize=22,color='#0c343d',alpha=0.5)
    plt.show
In [6]:
matrix_plot(df2, 'Perfect Plots: Matrix of corelation')

Definition by class

In [7]:
class mx_plot:
    
    def __init__(self,df,title):
        self.df = df
        self.title = title
    
    def matrix(self):
        sns.set(style="ticks")
        corr = df2.corr()
        mask = np.zeros_like(corr, dtype=np.bool)
        mask[np.triu_indices_from(mask)] = True

        f, ax = plt.subplots(figsize=(22, 10))
        #cmap = sns.diverging_palette(580, 10, as_cmap=True)
        cmap = sns.diverging_palette(580, 10, as_cmap=True) #Inna paleta barw

        sns.heatmap(corr, mask=mask, cmap=cmap, vmax=0.3, center=0.03,annot=True,
                square=True, linewidths=.9, cbar_kws={"shrink": 0.8})
        plt.xticks(rotation=90)
        plt.title(title,fontsize=22,color='#0c343d',alpha=0.5)
        plt.show
    
import seaborn as sns
In [11]:
df=df2
title = 'Perfect Plots: Matrix of corelation'

PKP = mx_plot(df2,title)
PKP.matrix()

Artykuł Perfect Plots_ Matrix of corelation pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Perfect plot Joyplot https://sigmaquality.pl/data-plots/perfect-plot-joyplot-100420201543/ Fri, 24 Apr 2020 09:41:15 +0000 http://sigmaquality.pl/perfect-plot-joyplot-100420201543/ Feel free to read the code on GitHub In [1]: import joypy import pandas as pd import matplotlib.pyplot as plt Car market analysis¶ Source of [...]

Artykuł Perfect plot Joyplot pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Feel free to read the code on GitHub

In [1]:

import joypy
import pandas as pd
import matplotlib.pyplot as plt
In [3]:
df= pd.read_csv('/home/wojciech/Pulpit/1/autos.csv')
df.head()
Out[3]:
Unnamed: 0 symboling normalized_losses make fuel_type aspiration num_doors body_style drive_wheels engine_location engine_size fuel_system bore stroke compression_ratio horsepower peak_rpm city_mpg highway_mpg price
0 0 3 NaN alfa-romero gas std two convertible rwd front 130 mpfi 3.47 2.68 9.0 111.0 5000.0 21 27 13495.0
1 1 3 NaN alfa-romero gas std two convertible rwd front 130 mpfi 3.47 2.68 9.0 111.0 5000.0 21 27 16500.0
2 2 1 NaN alfa-romero gas std two hatchback rwd front 152 mpfi 2.68 3.47 9.0 154.0 5000.0 19 26 16500.0
3 3 2 164.0 audi gas std four sedan fwd front 109 mpfi 3.19 3.40 10.0 102.0 5500.0 24 30 13950.0
4 4 2 164.0 audi gas std four sedan 4wd front 136 mpfi 3.19 3.40 8.0 115.0 5500.0 18 22 17450.0

5 rows × 27 columns

In [4]:
def N_plots(df,x1,x2,by,title, x_title):

    plt.figure(dpi= 380)

    fig, axes = joypy.joyplot(df, column=[x1, x2], by=by, ylim='own', figsize=(12,8), legend=True, color=['#f4cccc', '#0c343d'], alpha=0.4)
    # color=['#76a5af', '#134f5c']
    # color=['#f4cccc', '#0c343d']
    # color=['#a4c2f4', '#1c4587']
    #color=['#e06666', '#d9d9d9']
    #color=['#e06666', '#434343']
    #color=['#b6d7a8','#6aa84f']
    
    # Decoration
    plt.title(title, fontsize=32, color='#d0e0e3', alpha=0.9)
    plt.rc("font", size=20)
    plt.xlabel(x_title,  fontsize=16, color='darkred', alpha=1)
    #plt.ylabel('Data Scientist', fontsize=26,  color='grey', alpha=0.8)

    plt.show
In [5]:
df4 = df[['body_style','highway_mpg','city_mpg']]
df4.head()
Out[5]:
body_style highway_mpg city_mpg
0 convertible 27 21
1 convertible 27 21
2 hatchback 26 19
3 sedan 30 24
4 sedan 22 18
In [6]:
df=df
x1='highway_mpg'
x2='city_mpg'
by='body_style'
title = 'Fuel consumption by body style'
x_title = 'Fuel consumption'

N_plots(df,x1,x2,by,title, x_title)
<Figure size 2280x1520 with 0 Axes>

Joyplot Plot by class designer

In [11]:
class N_plot:
    
    def __init__(self,df,x1,x2,by,title, x_title):
        self.df = df
        self.x1 = x1
        self.x2 = x2
        self.by = by
        self.title = title
        self.x_title = x_title
    
    def plot(self):
        plt.figure(dpi= 380)
        fig, axes = joypy.joyplot(df, column=[x1, x2], by=by, ylim='own', figsize=(12,8), legend=True, color=['#e06666', '#d9d9d9'], alpha=0.4)
        plt.title(title, fontsize=32, color='#d0e0e3', alpha=0.9)
        plt.rc("font", size=20)
        plt.xlabel(x_title,  fontsize=16, color='darkred', alpha=1)
    
import matplotlib.pyplot as plt
plt.figure(dpi= 380)
    #color=['#76a5af', '#134f5c']
    #color=['#f4cccc', '#0c343d']
    #color=['#a4c2f4', '#1c4587']
    #color=['#e06666', '#d9d9d9']
    #color=['#e06666', '#434343']
    #color=['#b6d7a8','#6aa84f']
Out[11]:
<Figure size 2280x1520 with 0 Axes>
<Figure size 2280x1520 with 0 Axes>
In [12]:
df=df
x1='highway_mpg'
x2='city_mpg'
by='body_style'
title = 'Fuel consumption by body style'
x_title = 'Fuel consumption'

kot = N_plot(df,x1,x2,by,title, x_title)
In [13]:
kot.plot()
<Figure size 2280x1520 with 0 Axes>

Titanic disaster

We ought to find which passengers have chance to survive according to their affiliation to the established groups.

Source of data: https://www.kaggle.com/shivamp629/traincsv

In [14]:
df2 = pd.read_csv('/home/wojciech/Pulpit/1/kaggletrain.csv')
df2.head(3)
Out[14]:
Unnamed: 0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C
2 2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
In [5]:
df2['Age'].head()
Out[5]:
0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: Age, dtype: float64
In [16]:
AA = df2.pivot_table(index=['Name','Pclass'], columns='Sex', values='Age').reset_index()
AA.head()
Out[16]:
Sex Name Pclass female male
0 Abbing, Mr. Anthony 3 NaN 42.0
1 Abbott, Mr. Rossmore Edward 3 NaN 16.0
2 Abbott, Mrs. Stanton (Rosa Hunt) 3 35.0 NaN
3 Abelson, Mr. Samuel 2 NaN 30.0
4 Abelson, Mrs. Samuel (Hannah Wizosky) 2 28.0 NaN
In [18]:
df=AA
x1='female'
x2='male'
by='Pclass'
title = 'Titanic disaster: age distribution of casualties by the class'
x_title = 'Age of passengers'

pks = N_plot(df,x1,x2,by,title, x_title)
pks.plot()
<Figure size 2280x1520 with 0 Axes>
In [20]:
BB = df2.pivot_table(index=['Name','Survived'], columns='Sex', values='Age').reset_index()
BB.head()
Out[20]:
Sex Name Survived female male
0 Abbing, Mr. Anthony 0 NaN 42.0
1 Abbott, Mr. Rossmore Edward 0 NaN 16.0
2 Abbott, Mrs. Stanton (Rosa Hunt) 1 35.0 NaN
3 Abelson, Mr. Samuel 0 NaN 30.0
4 Abelson, Mrs. Samuel (Hannah Wizosky) 1 28.0 NaN
In [21]:
df=BB
x1='female'
x2='male'
by='Survived'
title = 'Titanic disaster: age distribution of casualties by the Survived'
x_title = 'Age of passengers'

ZHP = N_plot(df,x1,x2,by,title, x_title)
ZHP.plot()
<Figure size 2280x1520 with 0 Axes>
In [23]:
df3= pd.read_csv('/home/wojciech/Pulpit/1/drinksbycountry.csv')
df3.head()
Out[23]:
Unnamed: 0 country beer_servings spirit_servings wine_servings total_litres_of_pure_alcohol continent
0 0 Afghanistan 0 0 0 0.0 Asia
1 1 Albania 89 132 54 4.9 Europe
2 2 Algeria 25 0 14 0.7 Africa
3 3 Andorra 245 138 312 12.4 Europe
4 4 Angola 217 57 45 5.9 Africa
In [27]:
class N_plot3:
    
    def __init__(self,df,x1,x2,x3, by,title, x_title):
        self.df = df
        self.x1 = x1
        self.x2 = x2
        self.x3 = x3
        self.by = by
        self.title = title
        self.x_title = x_title
    
    def plot(self):
        plt.figure(dpi= 380)
        fig, axes = joypy.joyplot(df, column=[x1,x2,x3], by=by, ylim='own', figsize=(12,8), legend=True, color=['#b6d7a8','#1c4587', '#6aa84f'], alpha=0.4)
        plt.title(title, fontsize=32, color='#d0e0e3', alpha=0.9)
        plt.rc("font", size=20)
        plt.xlabel(x_title,  fontsize=16, color='darkred', alpha=1)
    
import matplotlib.pyplot as plt
plt.figure(dpi= 380)
    #color=['#76a5af', '#134f5c']
    #color=['#f4cccc', '#0c343d']
    #color=['#a4c2f4', '#1c4587']
    #color=['#e06666', '#d9d9d9']
    #color=['#e06666', '#434343']
    #color=['#b6d7a8','#6aa84f']
Out[27]:
<Figure size 2280x1520 with 0 Axes>
<Figure size 2280x1520 with 0 Axes>
In [30]:
df=df3
x1='beer_servings'
x2='spirit_servings'
x3='wine_servings'
by='continent'
title = 'Alcohol consumption by continents'
x_title = 'The level o consumptions'

PKO = N_plot3(df,x1,x2,x3,by,title, x_title)
PKO.plot()
<Figure size 2280x1520 with 0 Axes>

World Happiness Report

Source of data: https://data.world/promptcloud/world-happiness-report-2019

In [31]:
df4 = pd.read_csv('/home/wojciech/Pulpit/1/WorldHappinessReport.csv')
df4.head(3)
Out[31]:
Unnamed: 0 Country Region Happiness Rank Happiness Score Economy (GDP per Capita) Family Health (Life Expectancy) Freedom Trust (Government Corruption) Generosity Dystopia Residual Year
0 0 Afghanistan Southern Asia 153.0 3.575 0.31982 0.30285 0.30335 0.23414 0.09719 0.36510 1.95210 2015.0
1 1 Albania Central and Eastern Europe 95.0 4.959 0.87867 0.80434 0.81325 0.35733 0.06413 0.14272 1.89894 2015.0
2 2 Algeria Middle East and Northern Africa 68.0 5.605 0.93929 1.07772 0.61766 0.28579 0.17383 0.07822 2.43209 2015.0
In [32]:
df4['Year'].value_counts()
Out[32]:
2017.0    164
2016.0    164
2015.0    164
Name: Year, dtype: int64
In [34]:
CC = df4[df4['Year']==2017]
CC.head(3)
Out[34]:
Unnamed: 0 Country Region Happiness Rank Happiness Score Economy (GDP per Capita) Family Health (Life Expectancy) Freedom Trust (Government Corruption) Generosity Dystopia Residual Year
330 330 Afghanistan Southern Asia 141.0 3.794 0.401477 0.581543 0.180747 0.106180 0.061158 0.311871 2.150801 2017.0
331 331 Albania Central and Eastern Europe 109.0 4.644 0.996193 0.803685 0.731160 0.381499 0.039864 0.201313 1.490442 2017.0
332 332 Algeria Middle East and Northern Africa 53.0 5.872 1.091864 1.146217 0.617585 0.233336 0.146096 0.069437 2.567604 2017.0
In [36]:
df=CC
x1='Freedom'
x2='Trust (Government Corruption)'
by='Region'
title = 'World Happiness Report'
x_title = 'Indicator'

ZNP = N_plot(df,x1,x2,by,title, x_title)
ZNP.plot()
<Figure size 2280x1520 with 0 Axes>

Banking marketing

Analysis of the categorical results.
Source of data: https://archive.ics.uci.edu/ml/machine-learning-databases/00222/

In [37]:
df5 = pd.read_csv('/home/wojciech/Pulpit/1/bank.csv')
df5.head(3)
Out[37]:
Unnamed: 0 Unnamed: 0.1 age job marital education default housing loan contact campaign pdays previous poutcome emp_var_rate cons_price_idx cons_conf_idx euribor3m nr_employed y
0 0 0 44 blue-collar married basic.4y unknown yes no cellular 1 999 0 nonexistent 1.4 93.444 -36.1 4.963 5228.1 0
1 1 1 53 technician married unknown no no no cellular 1 999 0 nonexistent -0.1 93.200 -42.0 4.021 5195.8 0
2 2 2 28 management single university.degree no yes no cellular 3 6 2 success -1.7 94.055 -39.8 0.729 4991.6 1

3 rows × 23 columns

In [38]:
FF = df5.pivot_table(index=['Unnamed: 0','marital'], columns='y', values='age').reset_index()
FF.head()
Out[38]:
y Unnamed: 0 marital 0 1
0 0 married 44.0 NaN
1 1 married 53.0 NaN
2 2 single NaN 28.0
3 3 married 39.0 NaN
4 4 married NaN 55.0
In [40]:
df=FF
x1=0
x2=1
by='marital'
title = 'Customer age structure'
x_title = 'customer age'

KLD = N_plot(df,x1,x2,by,title, x_title)
KLD.plot()
<Figure size 2280x1520 with 0 Axes>

Artykuł Perfect plot Joyplot pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Perfect Plot: Classification charts https://sigmaquality.pl/data-plots/perfect-plot-classification-charts-280320200859/ Sat, 28 Mar 2020 08:01:30 +0000 http://sigmaquality.pl/perfect-plot-classification-charts-280320200859/ Feel free to read the code on GitHub data source: https://archive.ics.uci.edu/ml/datasets/Air+Quality In [1]: import numpy as np import pandas as pd import seaborn as sns import [...]

Artykuł Perfect Plot: Classification charts pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Feel free to read the code on GitHub

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
np.random.seed(123)
In [2]:
df = pd.read_csv ('/home/wojciech/Pulpit/1/AirQualityUCI.csv', sep=';')
df.head(3)
Out[2]:
Date Time CO(GT) PT08.S1(CO) NMHC(GT) C6H6(GT) PT08.S2(NMHC) NOx(GT) PT08.S3(NOx) NO2(GT) PT08.S4(NO2) PT08.S5(O3) T RH AH Unnamed: 15 Unnamed: 16
0 10/03/2004 18.00.00 2,6 1360.0 150.0 11,9 1046.0 166.0 1056.0 113.0 1692.0 1268.0 13,6 48,9 0,7578 NaN NaN
1 10/03/2004 19.00.00 2 1292.0 112.0 9,4 955.0 103.0 1174.0 92.0 1559.0 972.0 13,3 47,7 0,7255 NaN NaN
2 10/03/2004 20.00.00 2,2 1402.0 88.0 9,0 939.0 131.0 1140.0 114.0 1555.0 1074.0 11,9 54,0 0,7502 NaN NaN

Deletes the -200 variable indicating a data error

In [3]:
df[['CO(GT)', 'PT08.S1(CO)', 'NMHC(GT)', 'C6H6(GT)', 'PT08.S2(NMHC)',
       'NOx(GT)', 'PT08.S3(NOx)', 'NO2(GT)', 'PT08.S4(NO2)', 'PT08.S5(O3)',
       'T', 'RH', 'AH']] = df[['CO(GT)', 'PT08.S1(CO)', 'NMHC(GT)', 'C6H6(GT)', 'PT08.S2(NMHC)',
       'NOx(GT)', 'PT08.S3(NOx)', 'NO2(GT)', 'PT08.S4(NO2)', 'PT08.S5(O3)',
       'T', 'RH', 'AH']].replace(-200,np.NaN)

Deletes invalid records

In [4]:
del df['NMHC(GT)']
del df['Unnamed: 15']
del df['Unnamed: 16']

print(df.shape)
df.isnull().sum()
df = df.dropna(how='any')
print(df.shape)
print(df.isnull().sum())
(9471, 14)
(7393, 14)
Date             0
Time             0
CO(GT)           0
PT08.S1(CO)      0
C6H6(GT)         0
PT08.S2(NMHC)    0
NOx(GT)          0
PT08.S3(NOx)     0
NO2(GT)          0
PT08.S4(NO2)     0
PT08.S5(O3)      0
T                0
RH               0
AH               0
dtype: int64

Replace variables with numeric values

In [5]:
print(df.dtypes)
Date              object
Time              object
CO(GT)            object
PT08.S1(CO)      float64
C6H6(GT)          object
PT08.S2(NMHC)    float64
NOx(GT)          float64
PT08.S3(NOx)     float64
NO2(GT)          float64
PT08.S4(NO2)     float64
PT08.S5(O3)      float64
T                 object
RH                object
AH                object
dtype: object
In [6]:
df['CO(GT)'] = df['CO(GT)'].str.replace(',', '.')
In [7]:
df['C6H6(GT)'] = df['C6H6(GT)'].str.replace(',', '.')
In [8]:
df['T'] = df['T'].str.replace(',', '.')
In [9]:
df['RH'] = df['RH'].str.replace(',', '.')
In [10]:
df['AH'] = df['AH'].str.replace(',', '.')
In [11]:
df[['CO(GT)', 'PT08.S1(CO)',  'C6H6(GT)', 'PT08.S2(NMHC)',
       'NOx(GT)', 'PT08.S3(NOx)', 'NO2(GT)', 'PT08.S4(NO2)', 'PT08.S5(O3)',
       'T', 'RH', 'AH']] = df[['CO(GT)', 'PT08.S1(CO)', 'C6H6(GT)', 'PT08.S2(NMHC)',
       'NOx(GT)', 'PT08.S3(NOx)', 'NO2(GT)', 'PT08.S4(NO2)', 'PT08.S5(O3)',
       'T', 'RH', 'AH']].astype(float)

We choose the test kit

In [12]:
df2= df[['PT08.S4(NO2)','PT08.S3(NOx)','PT08.S2(NMHC)','AH','C6H6(GT)']]

Encodes the resulting categorical variable – C6H6 (GT)

In [13]:
print('max:',df2['C6H6(GT)'].max())
print('min:',df2['C6H6(GT)'].min())

sns.distplot(np.array(df['C6H6(GT)']),color='#999999')
max: 63.7
min: 0.1
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa961d2d750>
In [14]:
df2['C6H6(GT)'] = df['C6H6(GT)'].apply(lambda x: 1 if x > 10 else 0)
df2['C6H6(GT)'].value_counts(dropna = False, normalize=True).plot(kind='pie',colors=['#b7b7b7','#ea9999'])
Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa961e79650>
In [15]:
X = df2.drop('C6H6(GT)', axis=1) 
y = df2['C6H6(GT)']  

Data for the chart

In [16]:
y= y.to_frame()
y.head(5)
Out[16]:
C6H6(GT)
0 1
1 0
2 0
3 0
4 0
In [17]:
df2.head(3)
Out[17]:
PT08.S4(NO2) PT08.S3(NOx) PT08.S2(NMHC) AH C6H6(GT)
0 1692.0 1056.0 1046.0 0.7578 1
1 1559.0 1174.0 955.0 0.7255 0
2 1555.0 1140.0 939.0 0.7502 0

Classification chart

In [18]:
fig = plt.figure(figsize = (20, 25))
j = 0
for i in df2.columns:
    plt.subplot(6, 4, j+1)
    j = 1+j
    sns.distplot(df2[i][y['C6H6(GT)']==0], color='#999999', label = '0')
    sns.distplot(df2[i][y['C6H6(GT)']==1], color='#ff0000', label = '1')
    plt.legend(loc='best',fontsize=10)
fig.suptitle('Air pollution with C6H6 substance',fontsize=34,color='#ff0000',alpha=0.3)
fig.tight_layout()
fig.subplots_adjust(top=0.95)
plt.show()

Definicja

In [19]:
def scientist_plot(data, y, AAA, Title):
    fig = plt.figure(figsize = (20, 25))
    j = 0
    for i in df2.columns:
        plt.subplot(6, 4, j+1)
        j = 1+j
        sns.distplot(data[i][y[AAA]==0], color='#ffff00', label = 'acceptable norm')
        sns.distplot(data[i][y[AAA]==1], color='#4a86e8', label = 'norm exceeded')
        plt.legend(loc='best',fontsize=10)
    fig.suptitle(Title,fontsize=34,color='#4a86e8',alpha=0.5)
    fig.tight_layout()
    fig.subplots_adjust(top=0.95)
    plt.show()
In [20]:
scientist_plot(df2, y, 'C6H6(GT)','Statistical characteristics of exogenous variables')

Dispersion matrix for classification

In [21]:
fig = plt.figure(figsize = (20, 25))
kot = ['#999999','#ff0000']
sns.pairplot(data=df2[['PT08.S4(NO2)','PT08.S3(NOx)','PT08.S2(NMHC)','AH','C6H6(GT)']], hue='C6H6(GT)', dropna=True, height=2, palette=kot)
fig.suptitle('Air pollution with C6H6 substance',fontsize=34,color='#ff0000',alpha=0.3)
fig.tight_layout()
fig.subplots_adjust(top=0.95)
plt.show()
<Figure size 1440x1800 with 0 Axes>
In [ ]:
 
In [ ]:
 
In [ ]:
 

Artykuł Perfect Plot: Classification charts pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Perfect Plots: Bubble Plot https://sigmaquality.pl/data-plots/perfect-plots_-bubble-plot/ Thu, 07 Nov 2019 18:26:00 +0000 http://sigmaquality.pl/perfect-plots_-bubble-plot/   Feel free to read the code on GitHub In [1]: import pandas as pd import matplotlib.pyplot as plt import numpy as np   Autos [...]

Artykuł Perfect Plots: Bubble Plot pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
 

Feel free to read the code on GitHub

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
In [2]:
df2= pd.read_csv('c:/1/autos.csv')
df2.head()
Out[2]:
  Unnamed: 0 symboling normalized_losses make fuel_type aspiration num_doors body_style drive_wheels engine_location engine_size fuel_system bore stroke compression_ratio horsepower peak_rpm city_mpg highway_mpg price
0 0 3 NaN alfa-romero gas std two convertible rwd front 130 mpfi 3.47 2.68 9.0 111.0 5000.0 21 27 13495.0
1 1 3 NaN alfa-romero gas std two convertible rwd front 130 mpfi 3.47 2.68 9.0 111.0 5000.0 21 27 16500.0
2 2 1 NaN alfa-romero gas std two hatchback rwd front 152 mpfi 2.68 3.47 9.0 154.0 5000.0 19 26 16500.0
3 3 2 164.0 audi gas std four sedan fwd front 109 mpfi 3.19 3.40 10.0 102.0 5500.0 24 30 13950.0
4 4 2 164.0 audi gas std four sedan 4wd front 136 mpfi 3.19 3.40 8.0 115.0 5500.0 18 22 17450.0

5 rows × 27 columns

In [3]:
fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    

plt.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
plt.title("Bubble Plot of Autos Arean(color: 'price & size: 'city_mpg')", fontsize=16)
plt.xlabel('horsepower', fontsize=18)
plt.ylabel('engine_size', fontsize=18)
plt.colorbar()

plt.show()    
In [4]:
fig, ax = plt.subplots(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    

ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
ax.set_title("Bubble Plot of Autos Arean(color: 'price & size: 'engine_size')", fontsize=16)
ax.set_xlabel('horsepower', fontsize=18)
ax.set_ylabel('engine_size', fontsize=18)


## Sztuczka żeby mieć colorbar
AA = ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
plt.colorbar(AA)


### DRUGI SPOSÓB
#im = ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
#fig.colorbar(im, ax=ax)

handles, labels = AA.legend_elements(prop="sizes", alpha=0.6)
legend2 = ax.legend(handles, labels, loc="upper left", title="Sizes")

## sztuczka żeby mieć podpisy na kólkach
for i, txt in enumerate(df2['make']):
    ax.annotate(txt, (df2['horsepower'][i],df2['engine_size'] [i]))

plt.show()  
 

Midwest

In [5]:
df = pd.read_csv('c:/2/midwest_filter.csv')
df.head()
Out[5]:
  PID county state area poptotal popdensity popwhite popblack popamerindian popasian percprof poppovertyknown percpovertyknown percbelowpoverty percchildbelowpovert percadultpoverty percelderlypoverty inmetro category dot_size
0 561 ADAMS IL 0.052 66090 1270.961540 63917 1702 98 249 4.355859 63628.0 96.274777 13.151443 18.011717 11.009776 12.443812 0.0 AAR 250.944411
1 562 ALEXANDER IL 0.014 10626 759.000000 7054 3496 19 48 2.870315 10529.0 99.087145 32.244278 45.826514 27.385647 25.228976 0.0 LHR 185.781260
2 563 BOND IL 0.022 14991 681.409091 14477 429 35 16 4.488572 14235.0 94.956974 12.068844 14.036061 10.852090 12.697410 0.0 AAR 175.905385
3 564 BOONE IL 0.017 30806 1812.117650 29344 127 46 150 4.197800 30337.0 98.477569 7.209019 11.179536 5.536013 6.217047 1.0 ALU 319.823487
4 565 BROWN IL 0.018 5836 324.222222 5264 547 14 5 3.367680 4815.0 82.505140 13.520249 13.022889 11.143211 19.200000 0.0 AAR 130.442161

5 rows × 29 columns

In [6]:
# Plot
fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    
plt.scatter('area', 'poptotal', data=df, s='dot_size', c='popdensity', cmap='Reds', edgecolors='blue', linewidths=0.8)
plt.title("Bubble Plot of PopTotal vs Arean(color: 'popdensity' & size: 'dot_size' - both are numeric columns in midwest)", fontsize=16)
plt.xlabel('Area', fontsize=18)
plt.ylabel('Poptotal', fontsize=18)
plt.colorbar()
plt.show()   
In [7]:
fig, ax = plt.subplots(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    

ax.scatter('area', 'poptotal', data=df, s='dot_size', c='popdensity', cmap='YlGn', edgecolors='blue', linewidths=0.8)
ax.set_title("Bubble Plot of PopTotal vs Arean color: 'popdensity' & size: 'dot_size'", fontsize=16)
ax.set_xlabel('Area', fontsize=18)
ax.set_ylabel('Poptotal', fontsize=18)
   


## Sztuczka żeby mieć colorbar
BB = ax.scatter('area', 'poptotal', data=df, s='dot_size', c='popdensity', cmap='YlGn', edgecolors='blue', linewidths=0.8)
plt.colorbar(BB)


### DRUGI SPOSÓB
#im = ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
#fig.colorbar(im, ax=ax)

### legenda do wielkości kółek
handles, labels = BB.legend_elements(prop="sizes", alpha=0.6)
legend = ax.legend(handles, labels, loc="lower right", title="Sizes")

## sztuczka żeby mieć podpisy na kólkach
for i, txt in enumerate(df['county']):
    ax.annotate(txt, (df['area'][i],df['poptotal'] [i]))

plt.show()  
C:ProgramDataAnaconda3libsite-packagesmatplotlibcollections.py:995: RuntimeWarning: invalid value encountered in greater_equal
  cond = ((label_values >= func(arr).min()) &
C:ProgramDataAnaconda3libsite-packagesmatplotlibcollections.py:996: RuntimeWarning: invalid value encountered in less_equal
  (label_values <= func(arr).max()))
 

WorldHappinessReport

Source of data: https://worldhappiness.report/download/

 

The best plots appear when we combine various data!

In [8]:
df3= pd.read_csv('c:/1/WorldHappinessReport.csv')
df3 = df3[df3['Year']==2017]
df3.tail(2)
Out[8]:
  Unnamed: 0 Country Region Happiness Rank Happiness Score Economy (GDP per Capita) Family Health (Life Expectancy) Freedom Trust (Government Corruption) Generosity Dystopia Residual Year
493 493 Zambia Sub-Saharan Africa 116.0 4.514 0.636407 1.003187 0.257836 0.461603 0.078214 0.249580 1.826705 2017.0
494 494 Zimbabwe Sub-Saharan Africa 138.0 3.875 0.375847 1.083096 0.196764 0.336384 0.095375 0.189143 1.597970 2017.0
In [9]:
df4 = pd.read_csv('c:/1/WorldPopulation.csv')
df4.head(2)
Out[9]:
  Unnamed: 0 Country Name Country Code 1961 1962 1963 1964 1965 1966 1967 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
0 0 Aruba ABW 54211.0 55438.0 56225.0 56695.0 57032.0 57360.0 57715.0 101353.0 101453.0 101669.0 102053.0 102577.0 103187.0 103795.0 104341.0 104822.0 105264.0
1 1 Afghanistan AFG 8996351.0 9166764.0 9345868.0 9533954.0 9731361.0 9938414.0 10152331.0 27294031.0 28004331.0 28803167.0 29708599.0 30696958.0 31731688.0 32758020.0 33736494.0 34656032.0 35530081.0

2 rows × 61 columns

 

Only Africa and only 2017.

In [10]:
D3 = df4.set_index('Country Name')['2017'].to_dict()
#D3
In [11]:
df3['Population2017'] = df3['Country'].map(D3) 
df3['Population2017'] = df3['Population2017']/100000
In [12]:
df3.isnull().sum()
df3 = df3.dropna(how='any')
df3.isnull().sum()
Out[12]:
Unnamed: 0                       0
Country                          0
Region                           0
Happiness Rank                   0
Happiness Score                  0
Economy (GDP per Capita)         0
Family                           0
Health (Life Expectancy)         0
Freedom                          0
Trust (Government Corruption)    0
Generosity                       0
Dystopia Residual                0
Year                             0
Population2017                   0
dtype: int64
In [13]:
kot = ['Sub-Saharan Africa','Middle East and Northern Africa']
AFR = df3[df3['Region'].isin(kot)]
AFR.head(2)
Out[13]:
  Unnamed: 0 Country Region Happiness Rank Happiness Score Economy (GDP per Capita) Family Health (Life Expectancy) Freedom Trust (Government Corruption) Generosity Dystopia Residual Year Population2017
332 332 Algeria Middle East and Northern Africa 53.0 5.872 1.091864 1.146217 0.617585 0.233336 0.146096 0.069437 2.567604 2017.0 406.06052
333 333 Angola Sub-Saharan Africa 140.0 3.795 0.858428 1.104412 0.049869 0.000000 0.069720 0.097926 1.614482 2017.0 288.13463
In [14]:
AFR.to_csv('c:/8/AfricaHappinessReport2017.csv')
df10 = pd.read_csv('c:/8/AfricaHappinessReport2017.csv')
df10.head(2)
Out[14]:
  Unnamed: 0 Unnamed: 0.1 Country Region Happiness Rank Happiness Score Economy (GDP per Capita) Family Health (Life Expectancy) Freedom Trust (Government Corruption) Generosity Dystopia Residual Year Population2017
0 332 332 Algeria Middle East and Northern Africa 53.0 5.872 1.091864 1.146217 0.617585 0.233336 0.146096 0.069437 2.567604 2017.0 406.06052
1 333 333 Angola Sub-Saharan Africa 140.0 3.795 0.858428 1.104412 0.049869 0.000000 0.069720 0.097926 1.614482 2017.0 288.13463
In [15]:
fig, ax = plt.subplots(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    

ax.scatter('Happiness Score', 'Freedom', data=df10, s='Population2017', c='Freedom', cmap='RdYlGn', edgecolors='grey', linewidths=0.8)
ax.set_title("AFRICA 2017 Happiness & Freedomn(color: 'Economy (GDP per Capita)' & size: 'Population2017')", fontsize=16)
ax.set_xlabel('Happiness Score', fontsize=18)
ax.set_ylabel('Freedom', fontsize=18)


## Sztuczka żeby mieć colorbar
CC = ax.scatter('Happiness Score', 'Freedom', data=df10, s='Population2017', c='Freedom', cmap='RdYlGn', edgecolors='grey', linewidths=0.8)
plt.colorbar(CC)


### DRUGI SPOSÓB
#im = ax.scatter('horsepower', 'engine_size', data=df2, s='engine_size', c='price', cmap='PuBu', edgecolors='grey', linewidths=0.8)
#fig.colorbar(im, ax=ax)

### Sztuczka, żeby mieć legende do size - nie działa dla danych ciągłych (musi byc tylko kilka klas)
handles, labels = CC.legend_elements(prop="sizes", alpha=0.1)
legend2 = ax.legend(handles, labels, loc="upper left", title="Sizes")

## sztuczka żeby mieć podpisy na kólkach
for i, txt in enumerate(df10['Country']):
    ax.annotate(txt, (df10['Happiness Score'][i],df10['Freedom'] [i]))

plt.show()  
 

Diabetes

In [16]:
df2= pd.read_csv('c:/1/diabetes.csv')
df2.head(2)
Out[16]:
  Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0
 

Adds BMI indicator amplifier

In [17]:
df2['BMI_class'] = ((pd.qcut(df2['BMI'],5, labels=False).astype(int))+1)*70
In [18]:
fig = plt.figure(figsize=(14, 7), dpi= 280, facecolor='white', edgecolor='black')    
plt.scatter('Age', 'Glucose', data=df2, s='BMI_class', c='BloodPressure', cmap='YlOrBr', edgecolors='blue', linewidths=0.8)
plt.title("Bubble Plot of Diabetesn color: BloodPressure & size: BMI", fontsize=16)
plt.xlabel('Age', fontsize=18)
plt.ylabel('Glucose', fontsize=18)
plt.colorbar()
plt.show()     

Artykuł Perfect Plots: Bubble Plot pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Perfect Plots: Subplots https://sigmaquality.pl/data-plots/perfect-plots_-subplots/ Tue, 05 Nov 2019 19:48:00 +0000 http://sigmaquality.pl/perfect-plots_-subplots/ Feel free to read the code on GitHub In [1]: import pandas as pd import matplotlib.pyplot as plt import numpy as np   Economics In [2]: [...]

Artykuł Perfect Plots: Subplots pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Feel free to read the code on GitHub

In [1]:

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
 

Economics

In [2]:
df=pd.read_csv('c:/1/economics.txt')
df.head()
Out[2]:
  date pce pop psavert uempmed unemploy
0 1967-07-01 507.4 198712 12.5 4.5 2944
1 1967-08-01 510.5 198911 12.5 4.7 2945
2 1967-09-01 516.3 199113 11.7 4.6 2958
3 1967-10-01 512.9 199311 12.5 4.9 3143
4 1967-11-01 518.1 199498 12.5 4.7 3066
In [3]:
df.dtypes
Out[3]:
date         object
pce         float64
pop           int64
psavert     float64
uempmed     float64
unemploy      int64
dtype: object
In [4]:
x = df['date']
y1 = df['psavert']
y2 = df['unemploy']
In [6]:
# Plot Line1 (Left Y Axis)
fig, ax1 = plt.subplots(1,1,figsize=(16,9), dpi= 80)
ax1.plot(x, y1, color='tab:red')

# Plot Line2 (Right Y Axis)
ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
ax2.plot(x, y2, color='tab:blue')



# Decorations
# ax1 (left Y axis)
ax1.set_xlabel('Year', fontsize=20)
ax1.tick_params(axis='x', rotation=0, labelsize=12)
ax1.set_ylabel('Personal Savings Rate', color='tab:red', fontsize=20)
ax1.tick_params(axis='y', rotation=0, labelcolor='tab:red' )
ax1.grid(alpha=.4)

# ax2 (right Y axis)
ax2.set_ylabel("# Unemployed (1000's)", color='tab:blue', fontsize=20)
ax2.tick_params(axis='y', labelcolor='tab:blue')
ax2.set_xticks(np.arange(0, len(x), 60))
ax2.set_xticklabels(x[::60], rotation=90, fontdict={'fontsize':10})
ax2.set_title("Personal Savings Rate vs Unemployed: Plotting in Secondary Y Axis", fontsize=22)
fig.tight_layout()
plt.show()
In [52]:
x = df['date']
AA = df['psavert']
BB = df['unemploy']
In [53]:
from matplotlib import rc
rc('mathtext', default='regular')

ax = fig.add_subplot(111)
ax.plot(x, AA, '-', label = 'Year')

# Plot Line1 (Left Y Axis)
fig, ax1 = plt.subplots(1,1,figsize=(8,4), dpi= 280)
ax1.plot(x, AA, color='red', alpha=0.4)

# Plot Line2 (Right Y Axis)
ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
ax2.plot(x, BB, color='grey', alpha=0.4)


# Decorations
# ax1 (left Y axis)
ax1.set_xlabel('Year', fontsize=20)
ax1.tick_params(axis='x', rotation=90, labelsize=8)
ax1.set_ylabel('Personal Savings Rate', color='red', fontsize=12, alpha=0.4)
ax1.tick_params(axis='y', rotation=0, labelcolor='red' )
ax1.grid(alpha=.4)
ax1.set_ylim(0,18)

# ax2 (right Y axis)
ax2.set_ylabel("Perceptions of corruption", color='grey', fontsize=12, alpha=0.4)
ax2.tick_params(axis='y', labelcolor='grey')
ax2.set_xticks(np.arange(0, len(x), 60))
ax2.set_xticklabels(x[::60], rotation=90, fontdict={'fontsize':10})
ax2.set_title("Iraq 2008-2017", fontsize=15, alpha=0.4)
fig.tight_layout()
ax2.set_ylim(0, 16000)

plt.show()
 
 

WorldHappinessReport_2005-2019

In [7]:
df2 = pd.read_csv('c:/1/WorldHappinessReport_2005-2019.csv')
IRQ = df2[df2['Country name']=='Iraq']
IRQ
#df2['Country name'].value_counts()
Out[7]:
  Unnamed: 0 Country name Year Life Ladder Log GDP per capita Social support Healthy life expectancy at birth Freedom to make life choices Generosity Perceptions of corruption GINI index (World Bank estimate) GINI index (World Bank estimate), average 2000-16 gini of household income reported in Gallup, by wp5-year Most people can be trusted, Gallup Most people can be trusted, WVS round 1981-1984 Most people can be trusted, WVS round 1989-1993 Most people can be trusted, WVS round 1994-1998 Most people can be trusted, WVS round 1999-2004 Most people can be trusted, WVS round 2005-2009 Most people can be trusted, WVS round 2010-2014
678 678 Iraq 2008 4.589845 9.410621 0.744366 58.320000 0.385769 -0.097140 0.909882 NaN 0.2905 NaN NaN NaN NaN NaN 0.464239 0.39137 0.3
679 679 Iraq 2009 4.775317 9.417306 0.861746 58.959999 0.431468 -0.234837 0.854340 NaN 0.2905 0.330139 0.160026 NaN NaN NaN 0.464239 0.39137 0.3
680 680 Iraq 2010 5.065462 9.450742 0.854118 59.599998 0.419064 -0.159973 0.858735 NaN 0.2905 0.279507 NaN NaN NaN NaN 0.464239 0.39137 0.3
681 681 Iraq 2011 4.725366 9.492628 0.750749 59.360001 0.347414 -0.105158 0.780027 NaN 0.2905 0.264834 NaN NaN NaN NaN 0.464239 0.39137 0.3
682 682 Iraq 2012 4.659509 9.590554 0.730118 59.119999 0.314565 -0.056443 0.789191 0.295 0.2905 0.337229 NaN NaN NaN NaN 0.464239 0.39137 0.3
683 683 Iraq 2013 4.725017 9.630601 0.728285 58.880001 NaN -0.086607 0.709726 NaN 0.2905 0.447405 NaN NaN NaN NaN 0.464239 0.39137 0.3
684 684 Iraq 2014 4.541502 9.604973 0.725151 58.639999 0.646007 -0.038037 0.726008 NaN 0.2905 0.570643 NaN NaN NaN NaN 0.464239 0.39137 0.3
685 685 Iraq 2015 4.493377 9.620651 0.684435 58.400002 0.599460 -0.019349 0.762167 NaN 0.2905 0.631784 NaN NaN NaN NaN 0.464239 0.39137 0.3
686 686 Iraq 2016 4.412537 9.695360 0.718957 59.000000 0.666160 -0.088390 0.798866 NaN 0.2905 0.548373 NaN NaN NaN NaN 0.464239 0.39137 0.3
687 687 Iraq 2017 4.462399 9.659120 0.695109 59.599998 0.627722 -0.037715 0.757109 NaN 0.2905 0.550368 NaN NaN NaN NaN 0.464239 0.39137 0.3

10 rows × 27 columns

 

I fill in the data gaps

In [8]:
IRQ['Freedom to make life choices'].fillna(method='ffill', inplace=True)
IRQ['Freedom to make life choices']
C:ProgramDataAnaconda3libsite-packagespandascoregeneric.py:6130: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)
Out[8]:
678    0.385769
679    0.431468
680    0.419064
681    0.347414
682    0.314565
683    0.314565
684    0.646007
685    0.599460
686    0.666160
687    0.627722
Name: Freedom to make life choices, dtype: float64
In [46]:
x = IRQ['Year']
y1 = IRQ['Freedom to make life choices']
y2 = IRQ['Perceptions of corruption']
In [50]:
# Plot Line1 (Left Y Axis)
fig, ax1 = plt.subplots(1,1,figsize=(8,4), dpi= 280)
ax1.plot(x, y1,'rs-',color='red')

# Plot Line2 (Right Y Axis)
ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
ax2.plot(x, y2,'go-', color='blue')


# Decorations
# ax1 (left Y axis)
ax1.set_xlabel('Year', fontsize=20)
ax1.tick_params(axis='x', rotation=0, labelsize=9)
ax1.set_ylabel('Freedom to make life choices', color='red', fontsize=12)
ax1.tick_params(axis='y', rotation=0, labelcolor='red' )
ax1.grid(alpha=.4)

# ax2 (right Y axis)
ax2.set_ylabel("Perceptions of corruption", color='blue', fontsize=12)
ax2.tick_params(axis='y', labelcolor='blue')
ax2.set_xticks(np.arange(0, len(x), 60))
ax2.set_xticklabels(x[::60], rotation=0, fontdict={'fontsize':10})
ax2.set_title("Iraq 2008-2017", fontsize=22)
fig.tight_layout()


plt.show()
 

ufo reports

In [11]:
df4 = pd.read_csv('c:/1/uforeports.csv')
df4.head()
Out[11]:
  Unnamed: 0 City Colors Reported Shape Reported State Time
0 0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00
1 1 Willingboro NaN OTHER NJ 6/30/1930 20:00
2 2 Holyoke NaN OVAL CO 2/15/1931 14:00
3 3 Abilene NaN DISK KS 6/1/1931 13:00
4 4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00
In [12]:
df4['Time'].isnull().sum()
Out[12]:
0
In [13]:
df4['Time'] = pd.to_datetime(df4.Time)
df4['Time'].head(3)
Out[13]:
0   1930-06-01 22:00:00
1   1930-06-30 20:00:00
2   1931-02-15 14:00:00
Name: Time, dtype: datetime64[ns]
In [14]:
df4['Year'] = df4['Time'].dt.year
df4['Year'].head()
Out[14]:
0    1930
1    1930
2    1931
3    1931
4    1933
Name: Year, dtype: int64
In [15]:
circle = ['DISK', 'CIRCLE', 'SPHERE', 'FIREBALL', 'OVAL', 'TEARDROP']
nocircle = ['TRIANGLE','RECTANGLE','DIAMOND','CHEVRON',  ]

CI = df4[df4['Shape Reported'].isin(circle)]
NOCI = df4[df4['Shape Reported'].isin(nocircle)]
In [16]:
CII = CI.pivot_table(index='Year', values='Time', aggfunc='count').reset_index().set_index('Year')
CII.rename(columns = {'Time': 'Circle'}, inplace=True)





NOCII = NOCI.pivot_table(index='Year', values='Time', aggfunc='count').reset_index().set_index('Year')
NOCII.rename(columns = {'Time': 'NO-Circle'}, inplace=True)
NOCII.head(3)
Out[16]:
  NO-Circle
Year  
1930 1
1942 1
1943 1
In [17]:
result3 = pd.concat([CII, NOCII], axis=1, sort=False)
In [18]:
result3.head(4)
Out[18]:
  Circle NO-Circle
Year    
1930 NaN 1.0
1931 2.0 NaN
1934 2.0 NaN
1935 2.0 NaN
In [19]:
x = result3.index
y1 = result3['Circle']
y2 = result3['NO-Circle']
In [20]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('mathtext', default='regular')

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x, y1, '-', label = 'Swdown')

ax2 = ax.twinx()
ax2.plot(x, y2, '-r', label = 'temp')
ax.legend(loc=0)
ax.grid()
ax.set_xlabel("Time (h)")
ax.set_ylabel(r"Cyrkle")
ax2.set_ylabel(r"NO-Circle")
ax2.set_ylim(0, 800)
ax.set_ylim(0,800)
plt.show()
 

Shenyang air pollution

Source: https://zhuanlan.zhihu.com/p/61513436

In [25]:
import pandas as pd
import seaborn as sns
import matplotlib as plt

df7 = pd.read_csv('c:/8/ShenyangPM20100101_20151231.csv')
df7.head(3)
Out[25]:
  No year month day hour season PM_Taiyuanjie PM_US Post PM_Xiaoheyan DEWP HUMI PRES TEMP cbwd Iws precipitation Iprec
0 1 2010 1 1 0 4 NaN NaN NaN -26.0 69.79 1024.0 -22.0 NE 1.0289 NaN NaN
1 2 2010 1 1 1 4 NaN NaN NaN -26.0 76.26 1024.0 -23.0 NE 2.5722 NaN NaN
2 3 2010 1 1 2 4 NaN NaN NaN -27.0 69.56 1023.0 -23.0 NE 5.1444 NaN NaN
In [26]:
df7.shape
Out[26]:
(52584, 17)
In [27]:
df.reset_index(inplace=True)
df7.index
Out[27]:
RangeIndex(start=0, stop=52584, step=1)
In [28]:
#from datetime import datetime

#df7['Date'] = df7.apply(lambda row: datetime(row['year'], row['month'], row['day']), axis=1)
In [29]:
df7.head()
Out[29]:
  No year month day hour season PM_Taiyuanjie PM_US Post PM_Xiaoheyan DEWP HUMI PRES TEMP cbwd Iws precipitation Iprec
0 1 2010 1 1 0 4 NaN NaN NaN -26.0 69.79 1024.0 -22.0 NE 1.0289 NaN NaN
1 2 2010 1 1 1 4 NaN NaN NaN -26.0 76.26 1024.0 -23.0 NE 2.5722 NaN NaN
2 3 2010 1 1 2 4 NaN NaN NaN -27.0 69.56 1023.0 -23.0 NE 5.1444 NaN NaN
3 4 2010 1 1 3 4 NaN NaN NaN -27.0 69.56 1023.0 -23.0 NE 7.7166 NaN NaN
4 5 2010 1 1 4 4 NaN NaN NaN -27.0 69.56 1022.0 -23.0 NE 9.7744 NaN NaN
In [30]:
df7['Date2'] = pd.to_datetime(df7[['year','month','day']])
df7.head()
Out[30]:
  No year month day hour season PM_Taiyuanjie PM_US Post PM_Xiaoheyan DEWP HUMI PRES TEMP cbwd Iws precipitation Iprec Date2
0 1 2010 1 1 0 4 NaN NaN NaN -26.0 69.79 1024.0 -22.0 NE 1.0289 NaN NaN 2010-01-01
1 2 2010 1 1 1 4 NaN NaN NaN -26.0 76.26 1024.0 -23.0 NE 2.5722 NaN NaN 2010-01-01
2 3 2010 1 1 2 4 NaN NaN NaN -27.0 69.56 1023.0 -23.0 NE 5.1444 NaN NaN 2010-01-01
3 4 2010 1 1 3 4 NaN NaN NaN -27.0 69.56 1023.0 -23.0 NE 7.7166 NaN NaN 2010-01-01
4 5 2010 1 1 4 4 NaN NaN NaN -27.0 69.56 1022.0 -23.0 NE 9.7744 NaN NaN 2010-01-01
In [31]:
#df7.reset_index()
df7.set_index('Date2')
Out[31]:
  No year month day hour season PM_Taiyuanjie PM_US Post PM_Xiaoheyan DEWP HUMI PRES TEMP cbwd Iws precipitation Iprec
Date2                                  
2010-01-01 1 2010 1 1 0 4 NaN NaN NaN -26.0 69.79 1024.0 -22.0 NE 1.0289 NaN NaN
2010-01-01 2 2010 1 1 1 4 NaN NaN NaN -26.0 76.26 1024.0 -23.0 NE 2.5722 NaN NaN
2010-01-01 3 2010 1 1 2 4 NaN NaN NaN -27.0 69.56 1023.0 -23.0 NE 5.1444 NaN NaN
2010-01-01 4 2010 1 1 3 4 NaN NaN NaN -27.0 69.56 1023.0 -23.0 NE 7.7166 NaN NaN
2010-01-01 5 2010 1 1 4 4 NaN NaN NaN -27.0 69.56 1022.0 -23.0 NE 9.7744 NaN NaN
2010-01-01 6 2010 1 1 5 4 NaN NaN NaN -26.0 76.26 1022.0 -23.0 NE 11.8322 NaN NaN
2010-01-01 7 2010 1 1 6 4 NaN NaN NaN -25.0 76.46 1021.0 -22.0 NE 14.4044 NaN NaN
2010-01-01 8 2010 1 1 7 4 NaN NaN NaN -24.0 70.26 1021.0 -20.0 NE 16.9766 NaN NaN
2010-01-01 9 2010 1 1 8 4 NaN NaN NaN -23.0 70.49 1021.0 -19.0 NE 19.0344 NaN NaN
2010-01-01 10 2010 1 1 9 4 NaN NaN NaN -22.0 70.71 1021.0 -18.0 NE 21.6066 NaN NaN
2010-01-01 11 2010 1 1 10 4 NaN NaN NaN -20.0 77.39 1022.0 -17.0 NE 24.1788 NaN NaN
2010-01-01 12 2010 1 1 11 4 NaN NaN NaN -18.0 77.75 1021.0 -15.0 NE 27.2655 NaN NaN
2010-01-01 13 2010 1 1 12 4 NaN NaN NaN -17.0 77.92 1020.0 -14.0 NE 29.8377 NaN NaN
2010-01-01 14 2010 1 1 13 4 NaN NaN NaN -16.0 78.10 1019.0 -13.0 NE 32.9244 NaN NaN
2010-01-01 15 2010 1 1 14 4 NaN NaN NaN -15.0 84.87 1019.0 -13.0 NE 35.4966 NaN NaN
2010-01-01 16 2010 1 1 15 4 NaN NaN NaN -15.0 78.27 1019.0 -12.0 NE 38.5833 NaN NaN
2010-01-01 17 2010 1 1 16 4 NaN NaN NaN -15.0 78.27 1019.0 -12.0 NE 41.1555 NaN NaN
2010-01-01 18 2010 1 1 17 4 NaN NaN NaN -15.0 78.27 1020.0 -12.0 NE 43.2133 NaN NaN
2010-01-01 19 2010 1 1 18 4 NaN NaN NaN -16.0 78.10 1020.0 -13.0 NE 45.7855 NaN NaN
2010-01-01 20 2010 1 1 19 4 NaN NaN NaN -17.0 77.92 1021.0 -14.0 NE 48.3577 NaN NaN
2010-01-01 21 2010 1 1 20 4 NaN NaN NaN -17.0 84.62 1021.0 -15.0 NE 50.4155 NaN NaN
2010-01-01 22 2010 1 1 21 4 NaN NaN NaN -19.0 77.57 1022.0 -16.0 NE 51.9588 NaN NaN
2010-01-01 23 2010 1 1 22 4 NaN NaN NaN -20.0 77.39 1022.0 -17.0 NE 53.5021 NaN NaN
2010-01-01 24 2010 1 1 23 4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2010-01-02 25 2010 1 2 0 4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2010-01-02 26 2010 1 2 1 4 NaN NaN NaN -23.0 70.49 1023.0 -19.0 NE 1.5433 NaN NaN
2010-01-02 27 2010 1 2 2 4 NaN NaN NaN -24.0 70.26 1023.0 -20.0 NE 2.5722 NaN NaN
2010-01-02 28 2010 1 2 3 4 NaN NaN NaN -24.0 76.65 1023.0 -21.0 NE 3.6011 NaN NaN
2010-01-02 29 2010 1 2 4 4 NaN NaN NaN -24.0 76.65 1024.0 -21.0 NE 4.6300 NaN NaN
2010-01-02 30 2010 1 2 5 4 NaN NaN NaN -24.0 70.26 1024.0 -20.0 NE 6.1733 NaN NaN
2015-12-30 52555 2015 12 30 18 4 190.0 204.0 207.0 -9.0 73.45 1030.0 -5.0 NW 28.0000 0.0 0.0
2015-12-30 52556 2015 12 30 19 4 198.0 209.0 213.0 -10.0 67.88 1031.0 -5.0 NW 32.0000 0.0 0.0
2015-12-30 52557 2015 12 30 20 4 202.0 208.0 222.0 -11.0 67.65 1031.0 -6.0 NW 35.0000 0.0 0.0
2015-12-30 52558 2015 12 30 21 4 192.0 211.0 227.0 -11.0 73.05 1032.0 -7.0 NW 37.0000 0.0 0.0
2015-12-30 52559 2015 12 30 22 4 186.0 206.0 202.0 -11.0 78.94 1032.0 -8.0 NE 1.0000 0.0 0.0
2015-12-30 52560 2015 12 30 23 4 183.0 206.0 192.0 -13.0 78.61 1032.0 -10.0 cv 1.0000 0.0 0.0
2015-12-31 52561 2015 12 31 0 4 167.0 196.0 206.0 -13.0 92.22 1032.0 -12.0 SW 1.0000 0.0 0.0
2015-12-31 52562 2015 12 31 1 4 171.0 185.0 224.0 -13.0 92.22 1032.0 -12.0 cv 1.0000 0.0 0.0
2015-12-31 52563 2015 12 31 2 4 193.0 188.0 253.0 -14.0 84.99 1033.0 -12.0 SE 2.0000 0.0 0.0
2015-12-31 52564 2015 12 31 3 4 214.0 199.0 302.0 -15.0 84.87 1032.0 -13.0 SE 4.0000 0.0 0.0
2015-12-31 52565 2015 12 31 4 4 258.0 260.0 304.0 -15.0 92.09 1032.0 -14.0 SE 6.0000 0.0 0.0
2015-12-31 52566 2015 12 31 5 4 287.0 290.0 295.0 -17.0 84.62 1033.0 -15.0 SW 1.0000 0.0 0.0
2015-12-31 52567 2015 12 31 6 4 305.0 323.0 316.0 -16.0 92.02 1033.0 -15.0 cv 1.0000 0.0 0.0
2015-12-31 52568 2015 12 31 7 4 312.0 352.0 328.0 -17.0 91.95 1033.0 -16.0 SE 1.0000 0.0 0.0
2015-12-31 52569 2015 12 31 8 4 281.0 338.0 380.0 -17.0 84.62 1034.0 -15.0 cv 0.0000 0.0 0.0
2015-12-31 52570 2015 12 31 9 4 351.0 368.0 365.0 -13.0 92.22 1034.0 -12.0 SE 2.0000 0.0 0.0
2015-12-31 52571 2015 12 31 10 4 362.0 435.0 368.0 -11.0 85.35 1034.0 -9.0 SE 4.0000 0.0 0.0
2015-12-31 52572 2015 12 31 11 4 212.0 391.0 351.0 -9.0 79.26 1033.0 -6.0 SW 2.0000 0.0 0.0
2015-12-31 52573 2015 12 31 12 4 149.0 279.0 187.0 -8.0 79.42 1033.0 -5.0 SW 4.0000 0.0 0.0
2015-12-31 52574 2015 12 31 13 4 159.0 199.0 142.0 -9.0 63.20 1032.0 -3.0 SW 6.0000 0.0 0.0
2015-12-31 52575 2015 12 31 14 4 148.0 180.0 117.0 -10.0 58.41 1032.0 -3.0 SW 8.0000 0.0 0.0
2015-12-31 52576 2015 12 31 15 4 148.0 152.0 118.0 -10.0 54.23 1031.0 -2.0 SW 10.0000 0.0 0.0
2015-12-31 52577 2015 12 31 16 4 134.0 171.0 124.0 -10.0 58.41 1031.0 -3.0 SE 1.0000 0.0 0.0
2015-12-31 52578 2015 12 31 17 4 124.0 163.0 204.0 -9.0 68.11 1030.0 -4.0 SE 3.0000 0.0 0.0
2015-12-31 52579 2015 12 31 18 4 148.0 157.0 331.0 -11.0 78.94 1031.0 -8.0 cv 0.0000 0.0 0.0
2015-12-31 52580 2015 12 31 19 4 162.0 166.0 435.0 -10.0 92.42 1031.0 -9.0 SE 2.0000 0.0 0.0
2015-12-31 52581 2015 12 31 20 4 255.0 259.0 429.0 -10.0 79.10 1030.0 -7.0 SE 5.0000 0.0 0.0
2015-12-31 52582 2015 12 31 21 4 266.0 368.0 361.0 -10.0 79.10 1030.0 -7.0 SE 8.0000 0.0 0.0
2015-12-31 52583 2015 12 31 22 4 202.0 319.0 342.0 -10.0 79.10 1028.0 -7.0 SE 11.0000 NaN NaN
2015-12-31 52584 2015 12 31 23 4 NaN 275.0 NaN -9.0 79.26 1028.0 -6.0 SE 12.0000 0.0 0.0

52584 rows × 17 columns

In [32]:
df7.sample(8)
Out[32]:
  No year month day hour season PM_Taiyuanjie PM_US Post PM_Xiaoheyan DEWP HUMI PRES TEMP cbwd Iws precipitation Iprec Date2
21742 21743 2012 6 24 22 2 NaN NaN NaN 20.0 88.45 1006.0 22.0 SE 2.0 NaN NaN 2012-06-24
36038 36039 2014 2 10 14 4 41.0 45.0 46.0 -23.0 24.59 1033.0 -6.0 NE 82.0 0.0 0.0 2014-02-10
30139 30140 2013 6 9 19 2 11.0 5.0 9.0 11.0 46.78 1009.0 23.0 NW 23.0 0.0 0.0 2013-06-09
14929 14930 2011 9 15 1 3 NaN NaN NaN 17.0 93.89 1010.0 18.0 NE 2.0 0.0 0.0 2011-09-15
26707 26708 2013 1 17 19 4 89.0 NaN 188.0 -20.0 71.16 1036.0 -16.0 SE 3.0 0.0 0.0 2013-01-17
15165 15166 2011 9 24 21 3 NaN NaN NaN 9.0 59.29 1014.0 17.0 SE 3.0 0.0 0.0 2011-09-24
23018 23019 2012 8 17 2 2 NaN NaN NaN 19.0 93.98 1010.0 20.0 SE 12.0 0.0 0.0 2012-08-17
35752 35753 2014 1 29 16 4 68.0 44.0 46.0 -8.0 44.19 1008.0 3.0 SW 87.0 0.0 0.0 2014-01-29
In [33]:
PKP = df7.pivot_table(index='Date2', values=['TEMP','PRES'], aggfunc='mean').reset_index()
PKP.head()
Out[33]:
  Date2 PRES TEMP
0 2010-01-01 1021.173913 -17.347826
1 2010-01-02 1026.173913 -20.086957
2 2010-01-03 1026.833333 -21.416667
3 2010-01-04 1024.375000 -18.833333
4 2010-01-05 1029.375000 -20.416667
In [34]:
PKP.dtypes
Out[34]:
Date2    datetime64[ns]
PRES            float64
TEMP            float64
dtype: object
In [35]:
x = PKP['Date2']
y1 = PKP['PRES']
y2 = PKP['TEMP']
In [36]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('mathtext', default='regular')

fig = plt.figure(figsize=(8,4), dpi= 280)
ax = fig.add_subplot(111)
ax.plot(x, y1, '-', label = 'Pressure')

ax2 = ax.twinx()
ax2.plot(x, y2, '-r', label = 'temp')
ax.legend(loc=0)
ax.grid()
ax.set_xlabel("Days")
ax.set_ylabel(r"Pressure")
ax2.set_ylabel(r"Temperature")
#ax2.set_ylim(0, 800)
#ax.set_ylim(0,800)
plt.show()
C:ProgramDataAnaconda3libsite-packagespandasplotting_converter.py:129: FutureWarning: Using an implicitly registered datetime converter for a matplotlib plotting method. The converter was registered by pandas on import. Future versions of pandas will require you to explicitly register matplotlib converters.

To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()
  warnings.warn(msg, FutureWarning)
In [37]:
PKP2 = PKP[PKP['Date2'].dt.year==2011]
In [38]:
x = PKP2['Date2']
y1 = PKP2['PRES']
y2 = PKP2['TEMP']
In [39]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('mathtext', default='regular')

fig = plt.figure(figsize=(8,4), dpi= 280)
ax = fig.add_subplot(111)
ax.plot(x, y1, '-', label = 'Pressure')

ax2 = ax.twinx()
ax2.plot(x, y2, '-r', label = 'temp')
ax.legend(loc=0)
ax.grid()
ax.set_xlabel("Days")
ax.set_ylabel(r"Pressure")
ax2.set_ylabel(r"Temperature")
#ax2.set_ylim(0, 800)
#ax.set_ylim(0,800)
plt.show()
In [40]:
PRL = df7[df7['Date2'].dt.year==2015]

PRL2 = PRL.pivot_table(index='Date2', values=['TEMP','PM_Xiaoheyan'], aggfunc='mean').reset_index()
PRL2.head()
Out[40]:
  Date2 PM_Xiaoheyan TEMP
0 2015-01-01 30.000000 -13.208333
1 2015-01-02 44.375000 -12.416667
2 2015-01-03 57.333333 -4.208333
3 2015-01-04 90.076923 -5.000000
4 2015-01-05 81.125000 -3.833333
In [41]:
x = PRL2['Date2']
y1 = PRL2['PM_Xiaoheyan']
y2 = PRL2['TEMP']
In [42]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('mathtext', default='regular')

fig = plt.figure(figsize=(8,4), dpi= 280)
ax = fig.add_subplot(111)
ax.plot(x, y1, '-', label = 'PM_Xiaoheyan', color='black')

ax2 = ax.twinx()
ax2.plot(x, y2, '-r', label = 'temp', color='red')
ax.legend(loc=0)
ax.grid()
ax.set_xlabel("Days")
ax.set_ylabel(r"PM_Xiaoheyan")
ax2.set_ylabel(r"Temperature")
#ax2.set_ylim(0, 800)
ax.set_ylim(0,300)
plt.show()
In [43]:
x = PRL2['Date2']
y1 = PRL2['PM_Xiaoheyan']
y2 = PRL2['TEMP']
In [44]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('mathtext', default='regular')

fig = plt.figure(figsize=(8,4), dpi= 280)
ax = fig.add_subplot(111)
ax.plot(x, y1, '-', label = 'PM_Xiaoheyan',color='grey')
ax2 = ax.twinx()
ax2.plot(x, y2, '--', label = 'temp',color='red')
ax.legend(loc=0)
ax.grid()
ax.set_xlabel("months")
ax.set_ylabel(r"PM_Xiaoheyan")
ax2.set_ylabel(r"Temperature")
#ax2.set_ylim(0, 100)
#ax.set_ylim(-500,300)
plt.show()

Artykuł Perfect Plots: Subplots pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Perfect Plots: Individuals Control Chart I-MR https://sigmaquality.pl/data-plots/perfect-plots_-individuals-control-chart-i-mr/ Tue, 05 Nov 2019 19:29:00 +0000 http://sigmaquality.pl/perfect-plots_-individuals-control-chart-i-mr/ Energy Source of data: https://github.com/pyviz/holoviews/blob/master/examples/assets/energy.csv In [1]: import pandas as pd import matplotlib.pyplot as plt import numpy as np In [2]: df=pd.read_csv('c:/2/Energy.csv') df.head() Out[2]: Unnamed: 0 Date [...]

Artykuł Perfect Plots: Individuals Control Chart I-MR pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>

Energy

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
In [2]:
df=pd.read_csv('c:/2/Energy.csv')
df.head()
Out[2]:
Unnamed: 0 Date Consumption Wind Solar Wind+Solar
0 0 2006-01-01 1069.184 NaN NaN NaN
1 1 2006-01-02 1380.521 NaN NaN NaN
2 2 2006-01-03 1442.533 NaN NaN NaN
3 3 2006-01-04 1457.217 NaN NaN NaN
4 4 2006-01-05 1477.131 NaN NaN NaN
In [3]:
DDT = df[['Date','Consumption']]
print(DDT.head())
         Date  Consumption
0  2006-01-01     1069.184
1  2006-01-02     1380.521
2  2006-01-03     1442.533
3  2006-01-04     1457.217
4  2006-01-05     1477.131

Moving Range (R) Stretch marks means differences between the values of subsequent quotations

In [4]:
DDT['Cons_diff'] = DDT['Consumption'].diff()  # rozstępy między pomiarami
DDT['Cons_diff'] = abs(DDT['Cons_diff'])   # wartość bezwzględna dla liczby
In [5]:
DDT.head()
Out[5]:
Date Consumption Cons_diff
0 2006-01-01 1069.184 NaN
1 2006-01-02 1380.521 311.337
2 2006-01-03 1442.533 62.012
3 2006-01-04 1457.217 14.684
4 2006-01-05 1477.131 19.914

The average moving range and the average of the last 200 measurements

In [6]:
X_men = DDT['Consumption'].tail(200).mean(axis=0)
X_men
Out[6]:
1362.0095919500009
In [7]:
R_men = DDT['Cons_diff'].tail(200).mean(axis=0)
R_men
Out[7]:
100.07229509999998
In [8]:
DDT['X_men'] = DDT['Consumption'].tail(200).mean(axis=0)
DDT['R_men'] = DDT['Cons_diff'].tail(200).mean(axis=0)
DDT['UCLr'] = 3.27 * DDT['R_men']
DDT['UCLx'] = DDT['X_men'] + (2.66*DDT['R_men'])
DDT['LCLx'] = DDT['X_men'] - (2.66*DDT['R_men'])
DDT.head()
 
Out[8]:
Date Consumption Cons_diff X_men R_men UCLr UCLx LCLx
0 2006-01-01 1069.184 NaN 1362.009592 100.072295 327.236405 1628.201897 1095.817287
1 2006-01-02 1380.521 311.337 1362.009592 100.072295 327.236405 1628.201897 1095.817287
2 2006-01-03 1442.533 62.012 1362.009592 100.072295 327.236405 1628.201897 1095.817287
3 2006-01-04 1457.217 14.684 1362.009592 100.072295 327.236405 1628.201897 1095.817287
4 2006-01-05 1477.131 19.914 1362.009592 100.072295 327.236405 1628.201897 1095.817287

Standard deviation for the process

In [9]:

DDT['std_T'] = DDT['R_men']/1.128
DDT.tail(2)
Out[9]:
Date Consumption Cons_diff X_men R_men UCLr UCLx LCLx std_T
4381 2017-12-30 1215.44897 79.63856 1362.009592 100.072295 327.236405 1628.201897 1095.817287 88.716574
4382 2017-12-31 1107.11488 108.33409 1362.009592 100.072295 327.236405 1628.201897 1095.817287 88.716574

Last 30 measurements

In [10]:

ZZT = DDT.tail(60)
In [11]:
ZZT['Date'] = pd.to_datetime(ZZT['Date'])
ZZT.dtypes
Out[11]:
Date           datetime64[ns]
Consumption           float64
Cons_diff             float64
X_men                 float64
R_men                 float64
UCLr                  float64
UCLx                  float64
LCLx                  float64
std_T                 float64
dtype: object

Individuals Control Chart I-MR

In [12]:
fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=1, ncols=4,figsize=(18, 8))

grid = plt.GridSpec(3, 4, wspace=0.4, hspace=0.5)
ax1 = plt.subplot(grid[0, 0])
ax2 = plt.subplot(grid[0, 1:])
ax3 = plt.subplot(grid[1, :1])
ax4 = plt.subplot(grid[1, 1:])


#ax1.plot(ZZT['Consumption'],color='green', alpha=0.8)
#ax1.set_title('Freedom in the world', color='darkblue')
#ax1.set_xlabel('jakaś zmienna')
#ax1.set_ylabel('ogólnie wykres bez sensu', color='brown')

ax2.set_title('Control Chart I-MR', color='darkblue')
ax2.plot(ZZT['X_men'],color='green', alpha=0.8)
ax2.plot(ZZT['UCLx'],color='red', alpha=0.8)
ax2.plot(ZZT['LCLx'],color='red', alpha=0.8)
ax2.plot(ZZT['Consumption'],color='blue', alpha=0.8)
ax2.legend(loc=(0.65, 0.8))
ax2.grid()

#ax3.scatter(ZZT['couns_a_diff'],df['Freedom'],color='blue', alpha=0.8)
ax3.scatter(ZZT['Date'],ZZT['Consumption'],color='red', alpha=0.8)
#ax3.legend(loc=(0.65, 0.8))

ax4.set_title('Moving Range (R)', color='darkblue')
ax4.plot(ZZT['UCLr'],color='brown', alpha=0.8)
ax4.plot( ZZT['Cons_diff'], color='black', alpha=0.8)
ax4.grid()
In [13]:
dk = pd.read_csv('c:/11/ABC.txt')
dk
Out[13]:
nr point
0 1 3.5
1 2 2.4
2 3 4.1
3 4 2.8
4 5 3.0
5 6 4.7
6 7 1.2
7 8 0.9
8 9 2.5
9 10 3.1
10 11 3.6
11 12 4.1
12 13 3.8
13 14 2.5
14 15 2.8
15 16 4.3
16 17 4.1
17 18 3.6
18 19 2.4
19 20 4.8
20 21 3.5
21 22 2.5
22 23 1.3
23 24 4.5

Moving Range (R)

In [14]:
dk['point_diff'] = dk['point'].diff()  # rozstępy między pomiarami
dk['point_diff'] = abs(dk['point_diff'])   # wartość bezwzględna dla liczby

Mean and Range (R)

In [15]:
dk['X_men'] = dk['point'].tail(200).mean(axis=0)
dk['R_men'] = dk['point_diff'].tail(200).mean(axis=0)
dk['UCLr'] = 3.27 * dk['R_men']
dk['UCLx'] = dk['X_men'] + (2.66*dk['R_men'])
dk['LCLx'] = dk['X_men'] - (2.66*dk['R_men'])
dk.head()
Out[15]:
nr point point_diff X_men R_men UCLr UCLx LCLx
0 1 3.5 NaN 3.166667 1.191304 3.895565 6.335536 -0.002203
1 2 2.4 1.1 3.166667 1.191304 3.895565 6.335536 -0.002203
2 3 4.1 1.7 3.166667 1.191304 3.895565 6.335536 -0.002203
3 4 2.8 1.3 3.166667 1.191304 3.895565 6.335536 -0.002203
4 5 3.0 0.2 3.166667 1.191304 3.895565 6.335536 -0.002203

Control Chart I-MR

In [16]:
fig, (ax1, ax2, ax3, ax4) = plt.subplots(nrows=1, ncols=4,figsize=(20, 20))

grid = plt.GridSpec(6, 4, wspace=0.4, hspace=0.5)
ax1 = plt.subplot(grid[0, 0])
ax2 = plt.subplot(grid[0, 1:])
ax3 = plt.subplot(grid[1, :1])
ax4 = plt.subplot(grid[1, 1:])


#ax1.plot(ZZT['Consumption'],color='green', alpha=0.8)
#ax1.set_title('Freedom in the world', color='darkblue')
#ax1.set_xlabel('jakaś zmienna')
#ax1.set_ylabel('ogólnie wykres bez sensu', color='brown')

ax2.set_title('Control Chart I-MR', color='darkblue', fontsize=18)
ax2.plot(dk['X_men'],'--',color='green', alpha=0.8, label='X_men')
ax2.plot(dk['UCLx'],'-.',color='red', alpha=0.8, label='UCLx')
ax2.plot(dk['LCLx'],'-.',color='darkred', alpha=0.8, label='LCLx')
ax2.plot(dk['point'],'go-',color='blue', alpha=0.8, label='x')
#ax2.legend(loc=(-0.15, 0.5))
ax2.grid()
ax2.annotate('UCL', xy=(1,1), xytext=(0.95, 0.89), textcoords='axes fraction', fontsize=18, color='red')
ax2.annotate('LCL', xy=(1,1), xytext=(0.95, 0.07), textcoords='axes fraction', fontsize=18, color='black')
ax2.annotate('mean', xy=(1,1), xytext=(0.95, 0.5), textcoords='axes fraction', fontsize=18, color='green')


#ax3.scatter(ZZT['couns_a_diff'],df['Freedom'],color='blue', alpha=0.8)
#ax3.scatter(ZZT['Date'],ZZT['Consumption'],color='red', alpha=0.8)
#ax3.legend(loc=(0.65, 0.8))


ax4.set_title('Moving Range (R)', color='darkblue', fontsize=18)
ax4.plot(dk['UCLr'],'-.',color='red',alpha=0.8, label='UCLr')
ax4.plot(dk['R_men'],'--',color='green', alpha=0.8, label='R_men')
ax4.plot( dk['point_diff'],'rs-', color='black', alpha=0.8, label='R')
ax4.annotate('mean', xy=(1,1), xytext=(0.95, 0.3), textcoords='axes fraction', fontsize=18, color='green')
ax4.annotate('UCL', xy=(1,1), xytext=(0.95, 0.89), textcoords='axes fraction', fontsize=18, color='red')
#ax4.legend(loc=(-0.15, 0.6))
ax4.grid()

Artykuł Perfect Plots: Individuals Control Chart I-MR pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Perfect Plots: Bar plots https://sigmaquality.pl/data-plots/perfect-plots_-bar-plots/ Tue, 05 Nov 2019 19:22:00 +0000 http://sigmaquality.pl/perfect-plots_-bar-plots/ Feel free to read the code on GitHub     In [1]: import pandas as pd import matplotlib.pyplot as plt import numpy as np import matplotlib.patches [...]

Artykuł Perfect Plots: Bar plots pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Feel free to read the code on GitHub

 

 
In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
 

Car statistics

In [2]:
# Prepare Data
df = pd.read_csv('c:/1/mpg_ggplot2.txt')

df2 = df.pivot_table(index='manufacturer',values='model', aggfunc='count').reset_index()
df2 = df2.rename(columns={'model':'counts'})

# bardzo ważne żeby wykres był ładnie posortowany
df2.sort_values('counts', ascending=False, inplace=True)
df2.reset_index(inplace=True)
df2
Out[2]:
  index manufacturer counts
0 2 dodge 37
1 13 toyota 34
2 14 volkswagen 27
3 3 ford 25
4 1 chevrolet 19
5 0 audi 18
6 5 hyundai 14
7 12 subaru 14
8 10 nissan 13
9 4 honda 9
10 6 jeep 8
11 11 pontiac 5
12 7 land rover 4
13 9 mercury 4
14 8 lincoln 3
In [3]:
df.head(3)
Out[3]:
  manufacturer model displ year cyl trans drv cty hwy fl class
0 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
1 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
2 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
In [4]:
KAT = df.pivot_table(index =['hwy', 'cty'], values ='model', aggfunc='count'  ).reset_index()
KAT = KAT.rename(columns ={'model':'counts'})
KAT.head()
Out[4]:
  hwy cty counts
0 12 9 5
1 14 11 2
2 15 11 10
3 16 11 3
4 16 12 2
 

Stripplot

Size of circle is bigger as more points overlap.

In [26]:
import seaborn as sns

# Draw Stripplot
fig, ax = plt.subplots(figsize=(8,6), dpi= 280)    
sns.stripplot(KAT.cty, KAT.hwy, size=KAT.counts*2, ax=ax)

# Decorations
plt.title('Counts Plot - Size of circle is bigger as more points overlap', fontsize=12)
plt.show()
 

Plot Bars

In [6]:
# Plot Bars
plt.figure(figsize=(10,4), dpi= 280)
plt.bar(df2['manufacturer'], df2['counts'], color=['#7f6000','#bf9000','#f1c232','#ffd966','#ffe599','#fff2cc'], alpha=0.4, width=.5)
for i, val in enumerate(df2['counts'].values):
    plt.text(i, val, float(val), horizontalalignment='center', verticalalignment='bottom', fontdict={'fontweight':500, 'size':10})

# Decoration
plt.gca().set_xticklabels(df2['manufacturer'], rotation=90, horizontalalignment= 'right')
plt.title("Number of Vehicles by Manaufacturers", fontsize=14)
plt.ylabel('Vehicles')
plt.ylim(0, 45)
plt.show()
 

Trigger

 

Dane do wykresu
x1 opis
x2 dane liczbowe

In [7]:
name = df2.manufacturer
x = df2.counts
In [8]:
ylabel = 'Vehicles'
title = 'Number of Vehicles by Manaufacturers'
In [9]:
def bar1(name, x, ylabel, title):
    # Plot Bars
    plt.figure(figsize=(10,4), dpi= 280)
    plt.bar(name, x, color=['#0c343d','#134f5c','#45818e','#76a5af','#a2c4c9','#d0e0e3'], alpha=0.4, width=.5)
    for i, val in enumerate(df2['counts'].values):
        plt.text(i, val, float(val), horizontalalignment='center', verticalalignment='bottom', fontdict={'fontweight':500, 'size':10})

# Decoration
    plt.gca().set_xticklabels(df2['manufacturer'], rotation=90, horizontalalignment= 'right')
    plt.title(title, fontsize=14)
    plt.ylabel(ylabel)
    plt.ylim(0, 45)
    plt.show()
In [10]:
bar1(name, x, ylabel, title)
 

https://yagisanatode.com/2019/08/06/google-apps-script-hexadecimal-color-codes-for-google-docs-sheets-and-slides-standart-palette/

colors = ['#274e13','#6aa84f','#93c47d', '#b6d7a8','#d9ead3','#b7b7b7','#38761d'] #green
colors = ['#0c343d','#134f5c','#45818e','#76a5af','#a2c4c9','#d0e0e3'] #cyan
colors = ['#7f6000','#bf9000','#f1c232','#ffd966','#ffe599','#fff2cc'] #yelowcolors = ['#4c1130','#a64d79','#c27ba0','#d5a6bd','#ead1dc','#741b47',] #magentacolors = ['#e6b8af','#b6d7a8','#e06666','#747574','#ffd966','#ffcc99','#ea9999']
colors = ['#93c47d','#b6d7a8','#d9ead3','#d0e0e3','#a2c4c9','#76a5af']
colors = ['#c27ba0','#d5a6bd','#ead1dc','#ffffff','#a64d79','#d9d2e9','#b4a7d6'] #purple
colors = ['#cfe2f3','#9fc5e8','#6fa8dc'] #blue
colors = ['#d9ead3','#b6d7a8','#93c47d','#6aa84f']
colors = ['#ff0000','#434343','#666666','#999999','#b7b7b7','#cccccc','#d9d9d9','#efefef','#ffffff','#f3f3f3'] #=> niemieckie czasopismo
 

 

 

Airports

In [11]:
df3 = pd.read_csv('c:/1/airports.csv')
df3.head(3)
Out[11]:
  id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords
0 6523 00A heliport Total Rf Heliport 40.070801 -74.933601 11.0 NaN US US-PA Bensalem no 00A NaN 00A NaN NaN NaN
1 323361 00AA small_airport Aero B Ranch Airport 38.704022 -101.473911 3435.0 NaN US US-KS Leoti no 00AA NaN 00AA NaN NaN NaN
2 6524 00AK small_airport Lowell Field 59.949200 -151.695999 450.0 NaN US US-AK Anchor Point no 00AK NaN 00AK NaN NaN NaN
In [12]:
PPS = df3.pivot_table(index = 'type', values = 'name', aggfunc = 'count').reset_index()
PPS = PPS.rename(columns={'name':'counts'})
# bardzo ważne żeby wykres był ładnie posortowany
PPS.sort_values('counts', ascending=False, inplace=True)
PPS.reset_index(inplace=True)
PPS
Out[12]:
  index type counts
0 6 small_airport 33942
1 2 heliport 11248
2 4 medium_airport 4550
3 1 closed 3529
4 5 seaplane_base 1015
5 3 large_airport 624
6 0 balloonport 23
In [13]:
# Plot Bars
plt.figure(figsize=(8,4), dpi= 280)
plt.bar(PPS['type'], PPS['counts'], color=['#cfe2f3','#9fc5e8','#6fa8dc'], alpha=0.4, width=.5)
#numery na słupakach

for i, val in enumerate(PPS['counts'].values):
    plt.text(i, val, float(val), horizontalalignment='center', verticalalignment='bottom', fontdict={'fontweight':500, 'size':10})

# Decoration
plt.gca().set_xticklabels(PPS['type'], rotation=90, horizontalalignment= 'right')
plt.title("Number of airfields by the type", fontsize=14)
plt.ylabel('counts')
plt.ylim(0, 37000)
plt.show()
In [27]:
import matplotlib.pyplot as plt
color=['#7f6000','#bf9000','#f1c232','#ffd966','#ffe599','#fff2cc']

# Draw plot
fig, ax = plt.subplots(figsize=(7,4), dpi= 280)
ax.vlines(x=PPS.index, ymin=0, ymax=PPS.counts, color=color, alpha=0.7, linewidth=2)
ax.scatter(x=PPS.index, y=PPS.counts, s=75, color='firebrick', alpha=0.7)

# Title, Label, Ticks and Ylim
ax.set_title('Number of airfields by the type', fontdict={'size':10})
ax.set_ylabel('counts')
ax.set_xticks(PPS.index)
ax.set_xticklabels(PPS.type.str.upper(), rotation=90, fontdict={'horizontalalignment': 'right', 'size':8})
#ax.set_ylim(0, 30)

# Annotate
for row in PPS.itertuples():
    ax.text(row.Index, row.counts+.5, s=round(row.counts, 2), horizontalalignment= 'center', verticalalignment='bottom', fontsize=8)

plt.show()
In [28]:
## Draw plot
import matplotlib.patches as patches
color = ['#93c47d','#b6d7a8','#d9ead3','#d0e0e3','#a2c4c9','#76a5af']



fig, ax = plt.subplots(figsize=(16,10), facecolor='white', dpi= 180)
ax.vlines(x=PPS.type, ymin=0, ymax=PPS.counts, color=color, alpha=0.7, linewidth=20)

# Annotate Text
for i, kot in enumerate(PPS.counts):
    ax.text(i, kot+0.5, round(kot, 1), horizontalalignment='center', fontsize=18, rotation=90)


# Title, Label, Ticks and Ylim
ax.set_title('Number of airfields by the type', fontdict={'size':22})
ax.set(ylabel='counts', ylim=(0, 40000))
#plt.xticks(PPS.type, PPS.counts.str.upper(), rotation=30, horizontalalignment='right', fontsize=12)

# Add patches to color the X axis labels
#p1 = patches.Rectangle((.57, -0.005), width=.33, height=.13, alpha=.1, facecolor='grey', transform=fig.transFigure)
#p2 = patches.Rectangle((.124, -0.005), width=.446, height=.13, alpha=.1, facecolor='yellow', transform=fig.transFigure)
#fig.add_artist(p1)
#fig.add_artist(p2)
plt.show()
 

 

 

 

World Happiness Report

In [16]:
df4 = pd.read_csv('c:/1/WorldHappinessReport.csv')
df4.head(3)# Draw plot
Out[16]:
  Unnamed: 0 Country Region Happiness Rank Happiness Score Economy (GDP per Capita) Family Health (Life Expectancy) Freedom Trust (Government Corruption) Generosity Dystopia Residual Year
0 0 Afghanistan Southern Asia 153.0 3.575 0.31982 0.30285 0.30335 0.23414 0.09719 0.36510 1.95210 2015.0
1 1 Albania Central and Eastern Europe 95.0 4.959 0.87867 0.80434 0.81325 0.35733 0.06413 0.14272 1.89894 2015.0
2 2 Algeria Middle East and Northern Africa 68.0 5.605 0.93929 1.07772 0.61766 0.28579 0.17383 0.07822 2.43209 2015.0
In [17]:
PKP = df4.pivot_table(index='Region', values='Happiness Score', aggfunc= 'mean' ).reset_index()
PKP['Happiness Score'] = np.round(PKP['Happiness Score'], decimals=2)

# bardzo ważne żeby wykres był ładnie posortowany
PKP.sort_values('Happiness Score', inplace=True)
PKP.reset_index(inplace=True)

PKP
Out[17]:
  index Region Happiness Score
0 8 Sub-Saharan Africa 4.15
1 7 Southern Asia 4.59
2 6 Southeastern Asia 5.36
3 1 Central and Eastern Europe 5.37
4 4 Middle East and Northern Africa 5.39
5 2 Eastern Asia 5.64
6 3 Latin America and Caribbean 6.07
7 9 Western Europe 6.69
8 5 North America 7.23
9 0 Australia and New Zealand 7.30
 

colors = [’#d9ead3′,’#b6d7a8′,’#93c47d’,’#6aa84f’]

In [29]:
# Plot Bars
plt.figure(figsize=(8,4), dpi= 280)
plt.bar(PKP['Region'], PKP['Happiness Score'], color=['#d9ead3','#b6d7a8','#93c47d','#6aa84f'], alpha=0.4, width=.9)
#numery na słupakach

for i, val in enumerate(PKP['Happiness Score'].values):
    plt.text(i, val, float(val), horizontalalignment='center', verticalalignment='bottom', fontdict={'fontweight':500, 'size':10})

# Decoration
plt.gca().set_xticklabels(PKP['Region'], rotation=90, horizontalalignment= 'right')
plt.title("Average Happiness Rate", fontsize=14)
plt.ylabel('Rate' )
plt.ylim(0, 9)
plt.show()
In [19]:
PKP['HappS'] =PKP['Happiness Score']
In [34]:
import matplotlib.pyplot as plt
# Draw plot
fig, ax = plt.subplots(figsize=(5,3), dpi= 280)
ax.vlines(x=PKP.index, ymin=0, ymax=PKP['HappS'], color='firebrick', alpha=0.7, linewidth=2)
ax.scatter(x=PKP.index, y=PKP['HappS'], s=75, color='firebrick', alpha=0.7)

# Title, Label, Ticks and Ylim
ax.set_title('Average Happiness Rate', fontdict={'size':10})
ax.set_ylabel('Rate')
ax.set_xticks(PKP.index)
#ax.set_xticklabels(PKP.type.str.upper(), rotation=90, fontdict={'horizontalalignment': 'right', 'size':8})
ax.set_ylim(0, 9)

# Annotate
for row in PKP.itertuples():
    ax.text(row.Index, row.HappS+.5, s=round(row.HappS, 2), horizontalalignment= 'center', verticalalignment='bottom', fontsize=8)

plt.gca().set_xticklabels(PKP['Region'], rotation=90, horizontalalignment= 'right')
plt.show()
In [36]:
## Draw plot
import matplotlib.patches as patches

fig, ax = plt.subplots(figsize=(8,5), facecolor='white', dpi= 180)
ax.vlines(x=PKP.index, ymin=0, ymax=PKP['Happiness Score'], color='firebrick', alpha=0.7, linewidth=20)

# Annotate Text
for i, kot in enumerate(PKP['Happiness Score']):
    ax.text(i, kot+0.5, round(kot, 1), horizontalalignment='center', fontsize=18, rotation=0)


# Title, Label, Ticks and Ylim
ax.set_title('Average Happiness Rate', fontdict={'size':22})
ax.set(ylabel='Rate', ylim=(0, 9))
plt.xticks(PKP.index, PKP.Region.str.upper(), rotation=30, horizontalalignment='right', fontsize=12)

# Add patches to color the X axis labels
p1 = patches.Rectangle((.57, -0.005), width=.33, height=.13, alpha=.1, facecolor='green', transform=fig.transFigure)
p2 = patches.Rectangle((.124, -0.005), width=.446, height=.13, alpha=.1, facecolor='red', transform=fig.transFigure)
fig.add_artist(p1)
fig.add_artist(p2)
plt.show()

Artykuł Perfect Plots: Bar plots pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Perfect Plots: Combiplot https://sigmaquality.pl/data-plots/perfect-plots_-combiplot/ Tue, 05 Nov 2019 18:30:00 +0000 http://sigmaquality.pl/perfect-plots_-combiplot/ Feel free to read the code on GitHub Global market sales Source of data: https://github.com/vkrit/data-science-class/blob/master/WA_Fn-UseC_-Sales-Win-Loss.csv In [1]: import pandas as pd import matplotlib.pyplot as plt import [...]

Artykuł Perfect Plots: Combiplot pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Feel free to read the code on GitHub
In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

df = pd.read_csv('c:/1/WA_Fn-UseC_-Sales-Win-Loss.csv')
df.head()
Out[1]:
Opportunity Number Supplies Subgroup Supplies Group Region Route To Market Elapsed Days In Sales Stage Opportunity Result Sales Stage Change Count Total Days Identified Through Closing Total Days Identified Through Qualified Opportunity Amount USD Client Size By Revenue Client Size By Employee Count Revenue From Client Past Two Years Competitor Type Ratio Days Identified To Total Days Ratio Days Validated To Total Days Ratio Days Qualified To Total Days Deal Size Category
0 1641984 Exterior Accessories Car Accessories Northwest Fields Sales 76 Won 13 104 101 0 5 5 0 Unknown 0.69636 0.113985 0.154215 1
1 1658010 Exterior Accessories Car Accessories Pacific Reseller 63 Loss 2 163 163 0 3 5 0 Unknown 0.00000 1.000000 0.000000 1
2 1674737 Motorcycle Parts Performance & Non-auto Pacific Reseller 24 Won 7 82 82 7750 1 1 0 Unknown 1.00000 0.000000 0.000000 1
3 1675224 Shelters & RV Performance & Non-auto Midwest Reseller 16 Loss 5 124 124 0 1 1 0 Known 1.00000 0.000000 0.000000 1
4 1689785 Exterior Accessories Car Accessories Pacific Reseller 69 Loss 11 91 13 69756 1 1 0 Unknown 0.00000 0.141125 0.000000 4
In [2]:
SKS = df.pivot_table(index='Region',values='Sales Stage Change Count', aggfunc=['sum','mean']).reset_index()
SKS
Out[2]:
Region sum mean
Sales Stage Change Count Sales Stage Change Count
0 Mid-Atlantic 23050 3.045719
1 Midwest 61569 2.929068
2 Northeast 22247 3.023512
3 Northwest 27939 2.924937
4 Pacific 42636 2.815744
5 Southeast 28278 3.063042
6 Southwest 24902 3.054336
In [3]:
SKS.columns
Out[3]:
MultiIndex(levels=[['sum', 'mean', 'Region'], ['Sales Stage Change Count', '']],
           codes=[[2, 0, 1], [1, 0, 0]])

Merge column names

In [4]:
SKS.columns = ['_'.join(col) for col in SKS.columns.values]
SKS
Out[4]:
Region_ sum_Sales Stage Change Count mean_Sales Stage Change Count
0 Mid-Atlantic 23050 3.045719
1 Midwest 61569 2.929068
2 Northeast 22247 3.023512
3 Northwest 27939 2.924937
4 Pacific 42636 2.815744
5 Southeast 28278 3.063042
6 Southwest 24902 3.054336

Combiplot

In [10]:
fig = plt.figure(figsize=(12,6), dpi= 280)

ax = SKS['mean_Sales Stage Change Count'].plot(kind="bar", color='darkgrey',alpha=0.7)
ax.set_ylim(0,1.3*SKS["mean_Sales Stage Change Count"].max())
ax.set_xlabel("Regions", color='darkgrey',alpha=0.8, fontsize=20)
ax.set_ylabel(r"Mean", color='darkgrey',alpha=0.8, fontsize=20)
ax.grid(False)
ax.tick_params(axis='y', rotation=0, labelcolor='red')
ax.tick_params(axis='x', rotation=90, labelcolor='darkblue')
ax.set_xticklabels(SKS['Region_'], rotation=90, fontdict={'fontsize':20})


ax2 = ax.twinx()
ax2.plot(ax.get_xticks(),SKS['sum_Sales Stage Change Count'],'--', marker='o', c='red', linewidth=6)
ax2.set_ylim(0,1.05*SKS["sum_Sales Stage Change Count"].max())
ax2.grid(True)
ax2.set_ylabel(r"Sum of sales", color='darkgrey',alpha=0.8, fontsize=20)
ax2.set_title("Sales by regions", fontsize=23, alpha=0.4)

plt.show()

Artykuł Perfect Plots: Combiplot pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
Perfect Plots: Violinplot https://sigmaquality.pl/data-plots/perfect-plots_-violinplot/ Thu, 31 Oct 2019 19:50:00 +0000 http://sigmaquality.pl/perfect-plots_-violinplot/ Feel free to read the code on GitHub In [1]: import pandas as pd import matplotlib.pyplot as plt import numpy as np import matplotlib.patches as [...]

Artykuł Perfect Plots: Violinplot pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>
In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches

Titanic disaster

Source of data: https://www.kaggle.com/shivamp629/traincsv

In [2]:
df = pd.read_csv('c:/1/kaggletrain.csv')
df.head()

In [3]:

SKS = df.pivot_table(index=['Sex','Age'], values = 'Fare', aggfunc='count').reset_index()
SKS.head()
Out[3]:
Sex Age Fare
0 female 0.75 2
1 female 1.00 2
2 female 2.00 6
3 female 3.00 2
4 female 4.00 5
In [4]:
fig, ax = plt.subplots(figsize=(6,5), dpi= 80)  
import seaborn as sns

# Draw Stripplot
  
sns.stripplot(SKS.Sex, SKS.Age, size=SKS.Fare*2, ax=ax)

# Decorations
plt.title('Counts Plot - Size of circle is bigger as bigger is subpopulation', fontsize=12)
plt.show()
In [5]:
# Draw Plot
plt.figure(figsize=(6,4), dpi= 80)
sns.violinplot(x=SKS.Sex, y=SKS.Age, data=df, scale='width', inner='quartile')

# Decoration
plt.title('Age of the Titanic passengers by sex', fontsize=12)
plt.show()
In [6]:
PKP = df.pivot_table(index=['Pclass','Sex','Age'], values = 'Fare', aggfunc='count').reset_index()
PKP.rename(columns={'Fare':'Count'}, inplace=True)
PKP.head()
Out[6]:
Pclass Sex Age Count
0 1 female 2.0 1
1 1 female 14.0 1
2 1 female 15.0 1
3 1 female 16.0 3
4 1 female 17.0 2
In [7]:
# Draw Plot
plt.figure(figsize=(6,4), dpi= 80)
sns.violinplot(x=PKP.Pclass, y=PKP.Age, data=df, scale='width', inner='quartile',  palette="husl")

# Decoration
plt.title('Age of the Titanic passengers by sex', fontsize=18)
plt.show()

https://yagisanatode.com/2019/08/06/google-apps-script-hexadecimal-color-codes-for-google-docs-sheets-and-slides-standart-palette/

green = ['#274e13','#6aa84f','#93c47d', '#b6d7a8','#d9ead3','#b7b7b7','#38761d'] 
cyan = ['#0c343d','#134f5c','#45818e','#76a5af','#a2c4c9','#d0e0e3'] #cyan
yellow = ['#7f6000','#bf9000','#f1c232','#ffd966','#ffe599','#fff2cc'] #yellow
magenta = ['#4c1130','#a64d79','#c27ba0','#d5a6bd','#ead1dc','#741b47',] #magenta
colors = ['#e6b8af','#b6d7a8','#e06666','#747574','#ffd966','#ffcc99','#ea9999']
colors = ['#93c47d','#b6d7a8','#d9ead3','#d0e0e3','#a2c4c9','#76a5af']
purple = ['#c27ba0','#d5a6bd','#ead1dc','#ffffff','#a64d79','#d9d2e9','#b4a7d6'] #purple
blue = ['#cfe2f3','#9fc5e8','#6fa8dc'] #blue
colors = ['#d9ead3','#b6d7a8','#93c47d','#6aa84f']
grey = ['#000000', '#434343', '#666666', '#999999', '#b7b7b7', '#cccccc', '#d9d9d9','#efefef','#f3f3f3']
lightCornflower = ['#1c4587', '#1155cc', '#3c78d8', '#6d9eeb', '#a4c2f4', '#c9daf8', '#4a86e8', '#d9d9d9']

#colors = ['#ff0000','#434343','#666666','#999999','#b7b7b7','#cccccc','#d9d9d9','#efefef','#ffffff','#f3f3f3'] #=> niemieckie czasopismo
In [8]:
blue = ['#cfe2f3','#9fc5e8','#6fa8dc']
sns.violinplot(x=PKP.Pclass, y=PKP.Age, data=df, scale='width', inner='quartile',  palette=blue, alpha=0.1)
# Decoration
plt.title('Age of the Titanic passengers by sex', fontsize=18)
plt.show()
In [9]:
purple = ['#c27ba0','#d5a6bd','#ead1dc','#ffffff','#a64d79','#d9d2e9','#b4a7d6']

sns.violinplot(x=PKP.Pclass, y=PKP.Age, data=df, scale='width', inner='quartile',  palette=purple)
sns.swarmplot(x=PKP.Pclass, y=PKP.Age, data=df, color='lightblue', alpha=0.9)
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x1abd5251eb8>

WorldHappinessReport

In [10]:
df3 = pd.read_csv('c:/1/WorldHappinessReport.csv')
df3= df3[df3['Year']==2017]
df3.head(3)
Out[10]:
Unnamed: 0 Country Region Happiness Rank Happiness Score Economy (GDP per Capita) Family Health (Life Expectancy) Freedom Trust (Government Corruption) Generosity Dystopia Residual Year
330 330 Afghanistan Southern Asia 141.0 3.794 0.401477 0.581543 0.180747 0.106180 0.061158 0.311871 2.150801 2017.0
331 331 Albania Central and Eastern Europe 109.0 4.644 0.996193 0.803685 0.731160 0.381499 0.039864 0.201313 1.490442 2017.0
332 332 Algeria Middle East and Northern Africa 53.0 5.872 1.091864 1.146217 0.617585 0.233336 0.146096 0.069437 2.567604 2017.0
In [11]:
NIK = df3.pivot_table(index=['Region','Happiness Score'], values = 'Country', aggfunc='count').reset_index()
print(NIK.head())
REG = NIK['Region'].unique()
                       Region  Happiness Score  Country
0   Australia and New Zealand            7.284        1
1   Australia and New Zealand            7.314        1
2  Central and Eastern Europe            4.096        1
3  Central and Eastern Europe            4.286        1
4  Central and Eastern Europe            4.376        1
In [12]:
plt.figure(figsize=(16,4), dpi= 80)

cyan = ['#0c343d','#134f5c','#45818e','#76a5af','#a2c4c9','#d0e0e3'] 
sns.violinplot(x=NIK.Region, y=NIK['Happiness Score'], data=df3, scale='width', inner='quartile',  palette=cyan)
sns.swarmplot(x=NIK.Region, y=NIK['Happiness Score'], data=df3, color='white', alpha=0.4)


plt.gca().set_xticklabels(REG, rotation=90, horizontalalignment= 'right', fontsize=18)
plt.title("Happiness Score by regions 2017", fontsize=25, alpha=0.4)
plt.ylabel('Happiness Score')
#plt.ylim(0, 37000)
plt.show()

Drinksbycountry

In [13]:
df4 = pd.read_csv('c:/1/drinksbycountry.csv')
df4.head(3)
Out[13]:
Unnamed: 0 country beer_servings spirit_servings wine_servings total_litres_of_pure_alcohol continent
0 0 Afghanistan 0 0 0 0.0 Asia
1 1 Albania 89 132 54 4.9 Europe
2 2 Algeria 25 0 14 0.7 Africa
In [14]:
PKS = df4.pivot_table(index =['continent','total_litres_of_pure_alcohol'], values='country' , aggfunc='count').reset_index()
PKS.rename(columns={'country':'count'}, inplace=True)
PKS.head()
Out[14]:
continent total_litres_of_pure_alcohol count
0 Africa 0.0 3
1 Africa 0.1 2
2 Africa 0.2 2
3 Africa 0.3 1
4 Africa 0.4 1
In [15]:
plt.figure(figsize=(10,4), dpi= 280)

grey = ['#000000', '#434343', '#666666', '#999999', '#b7b7b7', '#cccccc', '#d9d9d9','#efefef','#f3f3f3']
sns.violinplot(x=PKS.continent, y=PKS['total_litres_of_pure_alcohol'], data=PKS, scale='width', inner='quartile',  palette=grey)
sns.swarmplot(x=PKS.continent, y=PKS['total_litres_of_pure_alcohol'], data=PKS, color='yellow', alpha=0.4)


#plt.gca().set_xticklabels(xxx, rotation=90, horizontalalignment= 'right', fontsize=14)
plt.title("Litres Of Pure Alcohol per person", fontsize=22, alpha=0.4)
plt.ylabel('counts')
#plt.ylim(0, 37000)
plt.show()

Imdbratings

In [16]:
df5 = pd.read_csv('c:/1/imdbratings.csv')
df5.head(3)
Out[16]:
Unnamed: 0 star_rating title content_rating genre duration actors_list
0 0 9.3 The Shawshank Redemption R Crime 142 [u’Tim Robbins’, u’Morgan Freeman’, u’Bob Gunt…
1 1 9.2 The Godfather R Crime 175 [u’Marlon Brando’, u’Al Pacino’, u’James Caan’]
2 2 9.1 The Godfather: Part II R Crime 200 [u’Al Pacino’, u’Robert De Niro’, u’Robert Duv…
In [17]:
SKO = df5.pivot_table(index=['genre','duration'], values='title', aggfunc='count').reset_index()
KOT = SKO['genre'].unique()
SKO.head()
Out[17]:
genre duration title
0 Action 80 1
1 Action 92 1
2 Action 93 2
3 Action 94 1
4 Action 98 1
In [18]:
plt.figure(figsize=(10,4), dpi= 280)

lightCornflower = ['#1c4587', '#1155cc', '#3c78d8', '#6d9eeb', '#a4c2f4', '#c9daf8', '#4a86e8', '#d9d9d9']
sns.violinplot(x=SKO.genre, y=SKO['duration'], data=SKO, scale='width', inner='quartile',  palette=lightCornflower, markers='d')
sns.swarmplot(x=SKO.genre, y=SKO['duration'], data=SKO, color='black', alpha=0.4)


plt.gca().set_xticklabels(KOT, rotation=90, horizontalalignment= 'right', fontsize=14, color='#45818e')
plt.title("Duration films by genre", fontsize=22, alpha=0.4)
plt.ylabel('Duration')
#plt.ylim(0, 37000)
plt.show()

Artykuł Perfect Plots: Violinplot pochodzi z serwisu THE DATA SCIENCE LIBRARY.

]]>