Perfect Plots: Violinplot

October 31, 2019 admin Data plots 0

violin

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches

Titanic disaster

Source of data: https://www.kaggle.com/shivamp629/traincsv

In [2]:

df = pd.read_csv('c:/1/kaggletrain.csv')
df.head()

In [3]:

SKS = df.pivot_table(index=['Sex','Age'], values = 'Fare', aggfunc='count').reset_index()
SKS.head()

Out[3]:

	Sex	Age	Fare
0	female	0.75	2
1	female	1.00	2
2	female	2.00	6
3	female	3.00	2
4	female	4.00	5

In [4]:

fig, ax = plt.subplots(figsize=(6,5), dpi= 80)  
import seaborn as sns

# Draw Stripplot
  
sns.stripplot(SKS.Sex, SKS.Age, size=SKS.Fare*2, ax=ax)

# Decorations
plt.title('Counts Plot - Size of circle is bigger as bigger is subpopulation', fontsize=12)
plt.show()

In [5]:

# Draw Plot
plt.figure(figsize=(6,4), dpi= 80)
sns.violinplot(x=SKS.Sex, y=SKS.Age, data=df, scale='width', inner='quartile')

# Decoration
plt.title('Age of the Titanic passengers by sex', fontsize=12)
plt.show()

In [6]:

PKP = df.pivot_table(index=['Pclass','Sex','Age'], values = 'Fare', aggfunc='count').reset_index()
PKP.rename(columns={'Fare':'Count'}, inplace=True)
PKP.head()

Out[6]:

	Pclass	Sex	Age	Count
0	1	female	2.0	1
1	1	female	14.0	1
2	1	female	15.0	1
3	1	female	16.0	3
4	1	female	17.0	2

In [7]:

# Draw Plot
plt.figure(figsize=(6,4), dpi= 80)
sns.violinplot(x=PKP.Pclass, y=PKP.Age, data=df, scale='width', inner='quartile',  palette="husl")

# Decoration
plt.title('Age of the Titanic passengers by sex', fontsize=18)
plt.show()

https://yagisanatode.com/2019/08/06/google-apps-script-hexadecimal-color-codes-for-google-docs-sheets-and-slides-standart-palette/

green = ['#274e13','#6aa84f','#93c47d', '#b6d7a8','#d9ead3','#b7b7b7','#38761d'] 
cyan = ['#0c343d','#134f5c','#45818e','#76a5af','#a2c4c9','#d0e0e3'] #cyan
yellow = ['#7f6000','#bf9000','#f1c232','#ffd966','#ffe599','#fff2cc'] #yellow
magenta = ['#4c1130','#a64d79','#c27ba0','#d5a6bd','#ead1dc','#741b47',] #magenta
colors = ['#e6b8af','#b6d7a8','#e06666','#747574','#ffd966','#ffcc99','#ea9999']
colors = ['#93c47d','#b6d7a8','#d9ead3','#d0e0e3','#a2c4c9','#76a5af']
purple = ['#c27ba0','#d5a6bd','#ead1dc','#ffffff','#a64d79','#d9d2e9','#b4a7d6'] #purple
blue = ['#cfe2f3','#9fc5e8','#6fa8dc'] #blue
colors = ['#d9ead3','#b6d7a8','#93c47d','#6aa84f']
grey = ['#000000', '#434343', '#666666', '#999999', '#b7b7b7', '#cccccc', '#d9d9d9','#efefef','#f3f3f3']
lightCornflower = ['#1c4587', '#1155cc', '#3c78d8', '#6d9eeb', '#a4c2f4', '#c9daf8', '#4a86e8', '#d9d9d9']

#colors = ['#ff0000','#434343','#666666','#999999','#b7b7b7','#cccccc','#d9d9d9','#efefef','#ffffff','#f3f3f3'] #=> niemieckie czasopismo

In [8]:

blue = ['#cfe2f3','#9fc5e8','#6fa8dc']
sns.violinplot(x=PKP.Pclass, y=PKP.Age, data=df, scale='width', inner='quartile',  palette=blue, alpha=0.1)
# Decoration
plt.title('Age of the Titanic passengers by sex', fontsize=18)
plt.show()

In [9]:

purple = ['#c27ba0','#d5a6bd','#ead1dc','#ffffff','#a64d79','#d9d2e9','#b4a7d6']

sns.violinplot(x=PKP.Pclass, y=PKP.Age, data=df, scale='width', inner='quartile',  palette=purple)
sns.swarmplot(x=PKP.Pclass, y=PKP.Age, data=df, color='lightblue', alpha=0.9)

Out[9]:

<matplotlib.axes._subplots.AxesSubplot at 0x1abd5251eb8>

WorldHappinessReport

In [10]:

df3 = pd.read_csv('c:/1/WorldHappinessReport.csv')
df3= df3[df3['Year']==2017]
df3.head(3)

Out[10]:

	Unnamed: 0	Country	Region	Happiness Rank	Happiness Score	Economy (GDP per Capita)	Family	Health (Life Expectancy)	Freedom	Trust (Government Corruption)	Generosity	Dystopia Residual	Year
330	330	Afghanistan	Southern Asia	141.0	3.794	0.401477	0.581543	0.180747	0.106180	0.061158	0.311871	2.150801	2017.0
331	331	Albania	Central and Eastern Europe	109.0	4.644	0.996193	0.803685	0.731160	0.381499	0.039864	0.201313	1.490442	2017.0
332	332	Algeria	Middle East and Northern Africa	53.0	5.872	1.091864	1.146217	0.617585	0.233336	0.146096	0.069437	2.567604	2017.0

In [11]:

NIK = df3.pivot_table(index=['Region','Happiness Score'], values = 'Country', aggfunc='count').reset_index()
print(NIK.head())
REG = NIK['Region'].unique()

                       Region  Happiness Score  Country
0   Australia and New Zealand            7.284        1
1   Australia and New Zealand            7.314        1
2  Central and Eastern Europe            4.096        1
3  Central and Eastern Europe            4.286        1
4  Central and Eastern Europe            4.376        1

In [12]:

plt.figure(figsize=(16,4), dpi= 80)

cyan = ['#0c343d','#134f5c','#45818e','#76a5af','#a2c4c9','#d0e0e3'] 
sns.violinplot(x=NIK.Region, y=NIK['Happiness Score'], data=df3, scale='width', inner='quartile',  palette=cyan)
sns.swarmplot(x=NIK.Region, y=NIK['Happiness Score'], data=df3, color='white', alpha=0.4)


plt.gca().set_xticklabels(REG, rotation=90, horizontalalignment= 'right', fontsize=18)
plt.title("Happiness Score by regions 2017", fontsize=25, alpha=0.4)
plt.ylabel('Happiness Score')
#plt.ylim(0, 37000)
plt.show()

Drinksbycountry

In [13]:

df4 = pd.read_csv('c:/1/drinksbycountry.csv')
df4.head(3)

Out[13]:

	Unnamed: 0	country	beer_servings	spirit_servings	wine_servings	total_litres_of_pure_alcohol	continent
0	0	Afghanistan	0	0	0	0.0	Asia
1	1	Albania	89	132	54	4.9	Europe
2	2	Algeria	25	0	14	0.7	Africa

In [14]:

PKS = df4.pivot_table(index =['continent','total_litres_of_pure_alcohol'], values='country' , aggfunc='count').reset_index()
PKS.rename(columns={'country':'count'}, inplace=True)
PKS.head()

Out[14]:

	continent	total_litres_of_pure_alcohol	count
0	Africa	0.0	3
1	Africa	0.1	2
2	Africa	0.2	2
3	Africa	0.3	1
4	Africa	0.4	1

In [15]:

plt.figure(figsize=(10,4), dpi= 280)

grey = ['#000000', '#434343', '#666666', '#999999', '#b7b7b7', '#cccccc', '#d9d9d9','#efefef','#f3f3f3']
sns.violinplot(x=PKS.continent, y=PKS['total_litres_of_pure_alcohol'], data=PKS, scale='width', inner='quartile',  palette=grey)
sns.swarmplot(x=PKS.continent, y=PKS['total_litres_of_pure_alcohol'], data=PKS, color='yellow', alpha=0.4)


#plt.gca().set_xticklabels(xxx, rotation=90, horizontalalignment= 'right', fontsize=14)
plt.title("Litres Of Pure Alcohol per person", fontsize=22, alpha=0.4)
plt.ylabel('counts')
#plt.ylim(0, 37000)
plt.show()

Imdbratings

In [16]:

df5 = pd.read_csv('c:/1/imdbratings.csv')
df5.head(3)

Out[16]:

	Unnamed: 0	star_rating	title	content_rating	genre	duration	actors_list
0	0	9.3	The Shawshank Redemption	R	Crime	142	[u’Tim Robbins’, u’Morgan Freeman’, u’Bob Gunt…
1	1	9.2	The Godfather	R	Crime	175	[u’Marlon Brando’, u’Al Pacino’, u’James Caan’]
2	2	9.1	The Godfather: Part II	R	Crime	200	[u’Al Pacino’, u’Robert De Niro’, u’Robert Duv…

In [17]:

SKO = df5.pivot_table(index=['genre','duration'], values='title', aggfunc='count').reset_index()
KOT = SKO['genre'].unique()
SKO.head()

Out[17]:

	genre	duration	title
0	Action	80	1
1	Action	92	1
2	Action	93	2
3	Action	94	1
4	Action	98	1

In [18]:

plt.figure(figsize=(10,4), dpi= 280)

lightCornflower = ['#1c4587', '#1155cc', '#3c78d8', '#6d9eeb', '#a4c2f4', '#c9daf8', '#4a86e8', '#d9d9d9']
sns.violinplot(x=SKO.genre, y=SKO['duration'], data=SKO, scale='width', inner='quartile',  palette=lightCornflower, markers='d')
sns.swarmplot(x=SKO.genre, y=SKO['duration'], data=SKO, color='black', alpha=0.4)


plt.gca().set_xticklabels(KOT, rotation=90, horizontalalignment= 'right', fontsize=14, color='#45818e')
plt.title("Duration films by genre", fontsize=22, alpha=0.4)
plt.ylabel('Duration')
#plt.ylim(0, 37000)
plt.show()

Copyright © 2024 | WordPress Theme by MH Themes