In [1]:
import joypy
import pandas as pd
import matplotlib.pyplot as plt
Car market analysis¶
Source of data: https://github.com/selva86/datasets/blob/master/mpg_ggplot2.csv
GSuite Text and Background Palette: https://yagisanatode.com/2019/08/06/google-apps-script-hexadecimal-color-codes-for-google-docs-sheets-and-slides-standart-palette/
df= pd.read_csv('/home/wojciech/Pulpit/1/autos.csv')
df.head()
def N_plots(df,x1,x2,by,title, x_title):
plt.figure(dpi= 380)
fig, axes = joypy.joyplot(df, column=[x1, x2], by=by, ylim='own', figsize=(12,8), legend=True, color=['#f4cccc', '#0c343d'], alpha=0.4)
# color=['#76a5af', '#134f5c']
# color=['#f4cccc', '#0c343d']
# color=['#a4c2f4', '#1c4587']
#color=['#e06666', '#d9d9d9']
#color=['#e06666', '#434343']
#color=['#b6d7a8','#6aa84f']
# Decoration
plt.title(title, fontsize=32, color='#d0e0e3', alpha=0.9)
plt.rc("font", size=20)
plt.xlabel(x_title, fontsize=16, color='darkred', alpha=1)
#plt.ylabel('Data Scientist', fontsize=26, color='grey', alpha=0.8)
plt.show
df4 = df[['body_style','highway_mpg','city_mpg']]
df4.head()
df=df
x1='highway_mpg'
x2='city_mpg'
by='body_style'
title = 'Fuel consumption by body style'
x_title = 'Fuel consumption'
N_plots(df,x1,x2,by,title, x_title)
Joyplot Plot by class designer¶
class N_plot:
def __init__(self,df,x1,x2,by,title, x_title):
self.df = df
self.x1 = x1
self.x2 = x2
self.by = by
self.title = title
self.x_title = x_title
def plot(self):
plt.figure(dpi= 380)
fig, axes = joypy.joyplot(df, column=[x1, x2], by=by, ylim='own', figsize=(12,8), legend=True, color=['#e06666', '#d9d9d9'], alpha=0.4)
plt.title(title, fontsize=32, color='#d0e0e3', alpha=0.9)
plt.rc("font", size=20)
plt.xlabel(x_title, fontsize=16, color='darkred', alpha=1)
import matplotlib.pyplot as plt
plt.figure(dpi= 380)
#color=['#76a5af', '#134f5c']
#color=['#f4cccc', '#0c343d']
#color=['#a4c2f4', '#1c4587']
#color=['#e06666', '#d9d9d9']
#color=['#e06666', '#434343']
#color=['#b6d7a8','#6aa84f']
df=df
x1='highway_mpg'
x2='city_mpg'
by='body_style'
title = 'Fuel consumption by body style'
x_title = 'Fuel consumption'
kot = N_plot(df,x1,x2,by,title, x_title)
kot.plot()
Titanic disaster¶
We ought to find which passengers have chance to survive according to their affiliation to the established groups.
Source of data: https://www.kaggle.com/shivamp629/traincsv
df2 = pd.read_csv('/home/wojciech/Pulpit/1/kaggletrain.csv')
df2.head(3)
df2['Age'].head()
AA = df2.pivot_table(index=['Name','Pclass'], columns='Sex', values='Age').reset_index()
AA.head()
df=AA
x1='female'
x2='male'
by='Pclass'
title = 'Titanic disaster: age distribution of casualties by the class'
x_title = 'Age of passengers'
pks = N_plot(df,x1,x2,by,title, x_title)
pks.plot()
BB = df2.pivot_table(index=['Name','Survived'], columns='Sex', values='Age').reset_index()
BB.head()
df=BB
x1='female'
x2='male'
by='Survived'
title = 'Titanic disaster: age distribution of casualties by the Survived'
x_title = 'Age of passengers'
ZHP = N_plot(df,x1,x2,by,title, x_title)
ZHP.plot()
Drinks by Country¶
Source of data: https://github.com/fivethirtyeight/data/blob/master/alcohol-consumption/drinks.csv
df3= pd.read_csv('/home/wojciech/Pulpit/1/drinksbycountry.csv')
df3.head()
class N_plot3:
def __init__(self,df,x1,x2,x3, by,title, x_title):
self.df = df
self.x1 = x1
self.x2 = x2
self.x3 = x3
self.by = by
self.title = title
self.x_title = x_title
def plot(self):
plt.figure(dpi= 380)
fig, axes = joypy.joyplot(df, column=[x1,x2,x3], by=by, ylim='own', figsize=(12,8), legend=True, color=['#b6d7a8','#1c4587', '#6aa84f'], alpha=0.4)
plt.title(title, fontsize=32, color='#d0e0e3', alpha=0.9)
plt.rc("font", size=20)
plt.xlabel(x_title, fontsize=16, color='darkred', alpha=1)
import matplotlib.pyplot as plt
plt.figure(dpi= 380)
#color=['#76a5af', '#134f5c']
#color=['#f4cccc', '#0c343d']
#color=['#a4c2f4', '#1c4587']
#color=['#e06666', '#d9d9d9']
#color=['#e06666', '#434343']
#color=['#b6d7a8','#6aa84f']
df=df3
x1='beer_servings'
x2='spirit_servings'
x3='wine_servings'
by='continent'
title = 'Alcohol consumption by continents'
x_title = 'The level o consumptions'
PKO = N_plot3(df,x1,x2,x3,by,title, x_title)
PKO.plot()
World Happiness Report¶
Source of data: https://data.world/promptcloud/world-happiness-report-2019
df4 = pd.read_csv('/home/wojciech/Pulpit/1/WorldHappinessReport.csv')
df4.head(3)
df4['Year'].value_counts()
CC = df4[df4['Year']==2017]
CC.head(3)
df=CC
x1='Freedom'
x2='Trust (Government Corruption)'
by='Region'
title = 'World Happiness Report'
x_title = 'Indicator'
ZNP = N_plot(df,x1,x2,by,title, x_title)
ZNP.plot()
Banking marketing¶
Analysis of the categorical results.
Source of data: https://archive.ics.uci.edu/ml/machine-learning-databases/00222/
df5 = pd.read_csv('/home/wojciech/Pulpit/1/bank.csv')
df5.head(3)
FF = df5.pivot_table(index=['Unnamed: 0','marital'], columns='y', values='age').reset_index()
FF.head()
df=FF
x1=0
x2=1
by='marital'
title = 'Customer age structure'
x_title = 'customer age'
KLD = N_plot(df,x1,x2,by,title, x_title)
KLD.plot()