Perfect Plots: Waffle plot

pip install pywaffle

In [1]:
from pywaffle import Waffle
import squarify 
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
df= pd.read_csv('c:/1/mpg_ggplot2.txt')
df.head()
Out[2]:
manufacturer model displ year cyl trans drv cty hwy fl class
0 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
1 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
2 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
3 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
4 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
In [3]:
df = df.groupby('class').size().reset_index(name='counts')
df
Out[3]:
class counts
0 2seater 5
1 compact 47
2 midsize 41
3 minivan 11
4 pickup 33
5 subcompact 35
6 suv 62
In [72]:
# Prepare Data

n_categories = df.shape[0]
colors = [plt.cm.YlGnBu(i/float(n_categories)) for i in range(n_categories)]

# Draw Plot and Decorate
fig = plt.figure(
    FigureClass=Waffle,
    plots={
        '111': {
            'values': df['counts'],
            'labels': ["{0} ({1})".format(n[0], n[1]) for n in df[['class', 'counts']].itertuples()],
            'legend': {'loc': 'upper left', 'bbox_to_anchor': (1.05, 1), 'fontsize': 12},
            'title': {'label': 'Vehicles by Class', 'loc': 'center', 'fontsize':28}
        },
    },
    rows=7,
    colors=colors,
    figsize=(16, 9)
)
In [5]:
n_categories
Out[5]:
7
In [6]:
colors
Out[6]:
[(0.988362, 0.998364, 0.644924, 1.0),
 (0.981173, 0.759135, 0.156863, 1.0),
 (0.961293, 0.488716, 0.084289, 1.0),
 (0.832299, 0.283913, 0.257383, 1.0),
 (0.621685, 0.164184, 0.388781, 1.0),
 (0.397674, 0.083257, 0.433183, 1.0),
 (0.15585, 0.044559, 0.325338, 1.0)]

Titanic disaster

We ought to find which passengers have chance to survive according to their affiliation to the established groups.

Source of data: https://www.kaggle.com/shivamp629/traincsv

In [7]:
df2 = pd.read_csv('c:/1/kaggletrain.csv')
df2.head(3)
Out[7]:
Unnamed: 0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C
2 2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
In [8]:
dfTDK = df2.groupby('Pclass').size().reset_index(name='counts')
dfTDK
Out[8]:
Pclass counts
0 1 216
1 2 184
2 3 491

paltes: https://matplotlib.org/examples/color/colormaps_reference.html

cmaps = [(‘Perceptually Uniform Sequential’, [
‘viridis’, ‘plasma’, ‘inferno’, ‘magma’]),
(‘Sequential’, [
‘Greys’, ‘Purples’, ‘Blues’, ‘Greens’, ‘Oranges’, ‘Reds’,
‘YlOrBr’, ‘YlOrRd’, ‘OrRd’, ‘PuRd’, ‘RdPu’, ‘BuPu’,
‘GnBu’, ‘PuBu’, ‘YlGnBu’, ‘PuBuGn’, ‘BuGn’, ‘YlGn’]),
(‘Sequential (2)’, [
‘binary’, ‘gist_yarg’, ‘gist_gray’, ‘gray’, ‘bone’, ‘pink’,
‘spring’, ‘summer’, ‘autumn’, ‘winter’, ‘cool’, ‘Wistia’,
‘hot’, ‘afmhot’, ‘gist_heat’, ‘copper’]),
(‘Diverging’, [
‘PiYG’, ‘PRGn’, ‘BrBG’, ‘PuOr’, ‘RdGy’, ‘RdBu’,
‘RdYlBu’, ‘RdYlGn’, ‘Spectral’, ‘coolwarm’, ‘bwr’, ‘seismic’]),
(‘Qualitative’, [
‘Pastel1’, ‘Pastel2’, ‘Paired’, ‘Accent’,
‘Dark2’, ‘Set1’, ‘Set2’, ‘Set3’,
‘tab10’, ‘tab20’, ‘tab20b’, ‘tab20c’]),
(‘Miscellaneous’, [
‘flag’, ‘prism’, ‘ocean’, ‘gist_earth’, ‘terrain’, ‘gist_stern’,
‘gnuplot’, ‘gnuplot2’, ‘CMRmap’, ‘cubehelix’, ‘brg’, ‘hsv’,
‘gist_rainbow’, ‘rainbow’, ‘jet’, ‘nipy_spectral’, ‘gist_ncar’])]

In [26]:
# Prepare Data

n_categories = dfTDK.shape[0]
colors2 = [plt.cm.cubehelix(i/float(n_categories)) for i in range(n_categories)]

## paltes: https://matplotlib.org/examples/color/colormaps_reference.html
# Draw Plot and Decorate
fig = plt.figure(dpi=380, FigureClass=Waffle,
    plots={
        '111': {
            'values': dfTDK['counts'],
            'labels': ["{0} ({1})".format(n[0], n[1]) for n in dfTDK[['Pclass', 'counts']].itertuples()],
            'legend': {'loc': 'upper left', 'bbox_to_anchor': (1.0, 1), 'fontsize': 28},
            'title': {'label': 'Structure of passengers population of Titanic', 'loc': 'center', 'fontsize':68,'alpha':0.5}
        },
    },
    rows=10,
    colors=colors2,
    figsize=(28, 7)
)

Embarked: (C = Cherbourg, Q = Queenstown, S = Southampton)

In [10]:
df2.Embarked = df2.Embarked.str.replace('C', 'Cherbourg')
df2.Embarked = df2.Embarked.str.replace('Q', 'Queenstown')
df2.Embarked = df2.Embarked.str.replace('S', 'Southampton')
df2.sample(4)
Out[10]:
Unnamed: 0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
368 368 369 1 3 Jermyn, Miss. Annie female NaN 0 0 14313 7.7500 NaN Queenstown
871 871 872 1 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 1 11751 52.5542 D35 Southampton
125 125 126 1 3 Nicola-Yarred, Master. Elias male 12.0 1 0 2651 11.2417 NaN Cherbourg
462 462 463 0 1 Gee, Mr. Arthur H male 47.0 0 0 111320 38.5000 E63 Southampton
In [11]:
dfPKP = df2.groupby('Embarked').size().reset_index(name='counts')
dfPKP
Out[11]:
Embarked counts
0 Cherbourg 168
1 Queenstown 77
2 Southampton 644
In [17]:
# Prepare Data

n_categories = dfPKP.shape[0]
colors2 = [plt.cm.bwr(i/float(n_categories)) for i in range(n_categories)]

## paltes: https://matplotlib.org/examples/color/colormaps_reference.html
# Draw Plot and Decorate
fig = plt.figure(dpi=380, FigureClass=Waffle,
    plots={
        '111': {
            'values': dfPKP['counts'],
            'labels': ["{0} ({1})".format(n[0], n[1]) for n in dfPKP[['Embarked', 'counts']].itertuples()],
            'legend': {'loc': 'upper left', 'bbox_to_anchor': (1.0, 1), 'fontsize': 28},
            'title': {'label': 'Place of embarking of passengers on the Titanic', 'loc': 'center', 'fontsize':58,'alpha':0.5}
        },
    },
    rows=10,
    colors=colors2,
    figsize=(28, 7)
)
In [41]:
df3 = pd.read_csv('c:/1/bank.csv')
df3.head(3)
Out[41]:
Unnamed: 0 Unnamed: 0.1 age job marital education default housing loan contact campaign pdays previous poutcome emp_var_rate cons_price_idx cons_conf_idx euribor3m nr_employed y
0 0 0 44 blue-collar married basic.4y unknown yes no cellular 1 999 0 nonexistent 1.4 93.444 -36.1 4.963 5228.1 0
1 1 1 53 technician married unknown no no no cellular 1 999 0 nonexistent -0.1 93.200 -42.0 4.021 5195.8 0
2 2 2 28 management single university.degree no yes no cellular 3 6 2 success -1.7 94.055 -39.8 0.729 4991.6 1

3 rows × 23 columns

In [48]:
df_STS = df3.pivot_table(index='job', values = 'Unnamed: 0',aggfunc='count').reset_index()
df_STS.sort_values('Unnamed: 0', ascending=False)
Out[48]:
job Unnamed: 0
0 admin. 10422
1 blue-collar 9254
9 technician 6743
7 services 3969
4 management 2924
5 retired 1720
2 entrepreneur 1456
6 self-employed 1421
3 housemaid 1060
10 unemployed 1014
8 student 875
11 unknown 330
In [55]:
n_categories = df_STS.shape[0]
colors2 = [plt.cm.bwr(i/float(n_categories)) for i in range(n_categories)]

## paltes: https://matplotlib.org/examples/color/colormaps_reference.html
# Draw Plot and Decorate
fig = plt.figure(dpi=180, FigureClass=Waffle,
    plots={
        '111': {
            'values': df_STS['Unnamed: 0'],
            'labels': ["{0} ({1})".format(n[0], n[1]) for n in df_STS[['job', 'Unnamed: 0']].itertuples()],
            'legend': {'loc': 'upper left', 'bbox_to_anchor': (1.0, 1), 'fontsize': 128},
            'title': {'label': 'Structure of bank customers by occupation', 'loc': 'center', 'fontsize':158,'alpha':0.5}
        },
    },
    rows=150,
    colors=colors2,
    figsize=(118, 70)
)