Perfect Plots: H_line plot

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
import matplotlib.pylab as plt
from pylab import plot, show, subplot, specgram, imshow, savefig
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import Imputer
import matplotlib.pyplot as plote

Banking marketing

Analysis of the categorical results. Source of data: https://archive.ics.uci.edu/ml/machine-learning-databases/00222/

In [2]:
df = pd.read_csv('c:/1/bank.csv')
df.head()
Out[2]:
Unnamed: 0 Unnamed: 0.1 age job marital education default housing loan contact campaign pdays previous poutcome emp_var_rate cons_price_idx cons_conf_idx euribor3m nr_employed y
0 0 0 44 blue-collar married basic.4y unknown yes no cellular 1 999 0 nonexistent 1.4 93.444 -36.1 4.963 5228.1 0
1 1 1 53 technician married unknown no no no cellular 1 999 0 nonexistent -0.1 93.200 -42.0 4.021 5195.8 0
2 2 2 28 management single university.degree no yes no cellular 3 6 2 success -1.7 94.055 -39.8 0.729 4991.6 1
3 3 3 39 services married high.school no no no cellular 2 999 0 nonexistent -1.8 93.075 -47.1 1.405 5099.1 0
4 4 4 55 retired married basic.4y no yes no cellular 1 3 1 success -2.9 92.201 -31.4 0.869 5076.2 1

5 rows × 23 columns

In [3]:
CORREL = df.corr().sort_values('y')
CORREL['y'].to_frame().sort_values('y')
CORREL.index
Out[3]:
Index(['nr_employed', 'pdays', 'euribor3m', 'emp_var_rate', 'cons_price_idx',
       'campaign', 'Unnamed: 0', 'Unnamed: 0.1', 'age', 'cons_conf_idx',
       'previous', 'duration', 'y'],
      dtype='object')
In [4]:
plt.figure(figsize=(10,6))
CORREL['y'].plot(kind='barh', color='red')
plt.title('Correlation with the result variable', fontsize=20)
plt.xlabel('Correlation level')
plt.ylabel('Continuous independent variables')
Out[4]:
Text(0, 0.5, 'Continuous independent variables')

Variables for the chart

In [5]:
lebel=CORREL.index
lebel
Out[5]:
Index(['nr_employed', 'pdays', 'euribor3m', 'emp_var_rate', 'cons_price_idx',
       'campaign', 'Unnamed: 0', 'Unnamed: 0.1', 'age', 'cons_conf_idx',
       'previous', 'duration', 'y'],
      dtype='object')
In [6]:
data = CORREL['y']
data
Out[6]:
nr_employed      -0.354678
pdays            -0.324914
euribor3m        -0.307771
emp_var_rate     -0.298334
cons_price_idx   -0.136211
campaign         -0.066357
Unnamed: 0       -0.006165
Unnamed: 0.1     -0.006165
age               0.030399
cons_conf_idx     0.054878
previous          0.230181
duration          0.405274
y                 1.000000
Name: y, dtype: float64
In [7]:
title = 'Correlation with the result variable'
In [8]:
# Draw plot, # dpi=80 wykres, który będzie miał wymiary 80 na 80 pikseli
plt.figure(figsize=(8,4), dpi= 80, facecolor='#f4cccc', edgecolor='yellow') 

plt.hlines(y=lebel, xmin=0, xmax=data)
for x, y, tex in zip(data, lebel, data):
    t = plt.text(x, y, round(tex, 2), horizontalalignment='right' if x < 0 else 'left', 
                 verticalalignment='center', fontdict={'color':'#ff0000' if x < 0 else '#38761d', 'size':14})

# Decorations    
plt.yticks(lebel, fontsize=12)
plt.title(title, fontdict={'size':20})
plt.grid(linestyle='--', alpha=0.5)
plt.xlim(-1.0, 1.0)
plt.show()

Trigger

In [9]:
def Hlines(data,lebel,title):
    plt.figure(figsize=(8,4), dpi= 80, facecolor='#f4cccc', edgecolor='yellow') # dpi=80 wykres, który będzie miał wymiary 80 na 80 pikseli
    plt.hlines(y=lebel, xmin=0, xmax=data)
    for x, y, tex in zip(data, lebel, data):
        t = plt.text(x, y, round(tex, 2), horizontalalignment='right' if x < 0 else 'left', 
                     verticalalignment='center', fontdict={'color':'#ff0000' if x < 0 else '#38761d', 'size':14})

    plt.yticks(lebel, fontsize=12, color='#660000', alpha=0.9)
    plt.title(title, fontdict={'size':20}, color='#660000', alpha=0.9)
    plt.grid(linestyle='--', alpha=0.6)
    plt.xlim(-1.0, 1.0) #limit ax
    plt.show()
In [10]:
data = CORREL['y']
lebel=CORREL.index
title = 'Correlation with the result variable'

Hlines(data,lebel,title)