In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import matplotlib.pylab as plt
from pylab import plot, show, subplot, specgram, imshow, savefig
from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import Imputer
import matplotlib.pyplot as plote
Banking marketing
Analysis of the categorical results. Source of data: https://archive.ics.uci.edu/ml/machine-learning-databases/00222/
In [2]:
df = pd.read_csv('c:/1/bank.csv')
df.head()
Out[2]:
In [3]:
CORREL = df.corr().sort_values('y')
CORREL['y'].to_frame().sort_values('y')
CORREL.index
Out[3]:
In [4]:
plt.figure(figsize=(10,6))
CORREL['y'].plot(kind='barh', color='red')
plt.title('Correlation with the result variable', fontsize=20)
plt.xlabel('Correlation level')
plt.ylabel('Continuous independent variables')
Out[4]:
Variables for the chart
In [5]:
lebel=CORREL.index
lebel
Out[5]:
In [6]:
data = CORREL['y']
data
Out[6]:
In [7]:
title = 'Correlation with the result variable'
In [8]:
# Draw plot, # dpi=80 wykres, który będzie miał wymiary 80 na 80 pikseli
plt.figure(figsize=(8,4), dpi= 80, facecolor='#f4cccc', edgecolor='yellow')
plt.hlines(y=lebel, xmin=0, xmax=data)
for x, y, tex in zip(data, lebel, data):
t = plt.text(x, y, round(tex, 2), horizontalalignment='right' if x < 0 else 'left',
verticalalignment='center', fontdict={'color':'#ff0000' if x < 0 else '#38761d', 'size':14})
# Decorations
plt.yticks(lebel, fontsize=12)
plt.title(title, fontdict={'size':20})
plt.grid(linestyle='--', alpha=0.5)
plt.xlim(-1.0, 1.0)
plt.show()
In [9]:
def Hlines(data,lebel,title):
plt.figure(figsize=(8,4), dpi= 80, facecolor='#f4cccc', edgecolor='yellow') # dpi=80 wykres, który będzie miał wymiary 80 na 80 pikseli
plt.hlines(y=lebel, xmin=0, xmax=data)
for x, y, tex in zip(data, lebel, data):
t = plt.text(x, y, round(tex, 2), horizontalalignment='right' if x < 0 else 'left',
verticalalignment='center', fontdict={'color':'#ff0000' if x < 0 else '#38761d', 'size':14})
plt.yticks(lebel, fontsize=12, color='#660000', alpha=0.9)
plt.title(title, fontdict={'size':20}, color='#660000', alpha=0.9)
plt.grid(linestyle='--', alpha=0.6)
plt.xlim(-1.0, 1.0) #limit ax
plt.show()
In [10]:
data = CORREL['y']
lebel=CORREL.index
title = 'Correlation with the result variable'
Hlines(data,lebel,title)

