Perfect Plots: Bar plots

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches

Car statistics

In [2]:
# Prepare Data
df = pd.read_csv('c:/1/mpg_ggplot2.txt')

df2 = df.pivot_table(index='manufacturer',values='model', aggfunc='count').reset_index()
df2 = df2.rename(columns={'model':'counts'})

# bardzo ważne żeby wykres był ładnie posortowany
df2.sort_values('counts', ascending=False, inplace=True)
df2.reset_index(inplace=True)
df2
Out[2]:
index manufacturer counts
0 2 dodge 37
1 13 toyota 34
2 14 volkswagen 27
3 3 ford 25
4 1 chevrolet 19
5 0 audi 18
6 5 hyundai 14
7 12 subaru 14
8 10 nissan 13
9 4 honda 9
10 6 jeep 8
11 11 pontiac 5
12 7 land rover 4
13 9 mercury 4
14 8 lincoln 3
In [3]:
df.head(3)
Out[3]:
manufacturer model displ year cyl trans drv cty hwy fl class
0 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
1 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
2 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
In [4]:
KAT = df.pivot_table(index =['hwy', 'cty'], values ='model', aggfunc='count'  ).reset_index()
KAT = KAT.rename(columns ={'model':'counts'})
KAT.head()
Out[4]:
hwy cty counts
0 12 9 5
1 14 11 2
2 15 11 10
3 16 11 3
4 16 12 2

Stripplot

Size of circle is bigger as more points overlap.

In [26]:
import seaborn as sns

# Draw Stripplot
fig, ax = plt.subplots(figsize=(8,6), dpi= 280)    
sns.stripplot(KAT.cty, KAT.hwy, size=KAT.counts*2, ax=ax)

# Decorations
plt.title('Counts Plot - Size of circle is bigger as more points overlap', fontsize=12)
plt.show()

Plot Bars

In [6]:
# Plot Bars
plt.figure(figsize=(10,4), dpi= 280)
plt.bar(df2['manufacturer'], df2['counts'], color=['#7f6000','#bf9000','#f1c232','#ffd966','#ffe599','#fff2cc'], alpha=0.4, width=.5)
for i, val in enumerate(df2['counts'].values):
    plt.text(i, val, float(val), horizontalalignment='center', verticalalignment='bottom', fontdict={'fontweight':500, 'size':10})

# Decoration
plt.gca().set_xticklabels(df2['manufacturer'], rotation=90, horizontalalignment= 'right')
plt.title("Number of Vehicles by Manaufacturers", fontsize=14)
plt.ylabel('Vehicles')
plt.ylim(0, 45)
plt.show()

Trigger

Dane do wykresu
x1 opis
x2 dane liczbowe

In [7]:
name = df2.manufacturer
x = df2.counts
In [8]:
ylabel = 'Vehicles'
title = 'Number of Vehicles by Manaufacturers'
In [9]:
def bar1(name, x, ylabel, title):
    # Plot Bars
    plt.figure(figsize=(10,4), dpi= 280)
    plt.bar(name, x, color=['#0c343d','#134f5c','#45818e','#76a5af','#a2c4c9','#d0e0e3'], alpha=0.4, width=.5)
    for i, val in enumerate(df2['counts'].values):
        plt.text(i, val, float(val), horizontalalignment='center', verticalalignment='bottom', fontdict={'fontweight':500, 'size':10})

# Decoration
    plt.gca().set_xticklabels(df2['manufacturer'], rotation=90, horizontalalignment= 'right')
    plt.title(title, fontsize=14)
    plt.ylabel(ylabel)
    plt.ylim(0, 45)
    plt.show()
In [10]:
bar1(name, x, ylabel, title)

https://yagisanatode.com/2019/08/06/google-apps-script-hexadecimal-color-codes-for-google-docs-sheets-and-slides-standart-palette/

colors = ['#274e13','#6aa84f','#93c47d', '#b6d7a8','#d9ead3','#b7b7b7','#38761d'] #green
colors = ['#0c343d','#134f5c','#45818e','#76a5af','#a2c4c9','#d0e0e3'] #cyan
colors = ['#7f6000','#bf9000','#f1c232','#ffd966','#ffe599','#fff2cc'] #yelowcolors = ['#4c1130','#a64d79','#c27ba0','#d5a6bd','#ead1dc','#741b47',] #magentacolors = ['#e6b8af','#b6d7a8','#e06666','#747574','#ffd966','#ffcc99','#ea9999']
colors = ['#93c47d','#b6d7a8','#d9ead3','#d0e0e3','#a2c4c9','#76a5af']
colors = ['#c27ba0','#d5a6bd','#ead1dc','#ffffff','#a64d79','#d9d2e9','#b4a7d6'] #purple
colors = ['#cfe2f3','#9fc5e8','#6fa8dc'] #blue
colors = ['#d9ead3','#b6d7a8','#93c47d','#6aa84f']
colors = ['#ff0000','#434343','#666666','#999999','#b7b7b7','#cccccc','#d9d9d9','#efefef','#ffffff','#f3f3f3'] #=> niemieckie czasopismo

Airports

In [11]:
df3 = pd.read_csv('c:/1/airports.csv')
df3.head(3)
Out[11]:
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords
0 6523 00A heliport Total Rf Heliport 40.070801 -74.933601 11.0 NaN US US-PA Bensalem no 00A NaN 00A NaN NaN NaN
1 323361 00AA small_airport Aero B Ranch Airport 38.704022 -101.473911 3435.0 NaN US US-KS Leoti no 00AA NaN 00AA NaN NaN NaN
2 6524 00AK small_airport Lowell Field 59.949200 -151.695999 450.0 NaN US US-AK Anchor Point no 00AK NaN 00AK NaN NaN NaN
In [12]:
PPS = df3.pivot_table(index = 'type', values = 'name', aggfunc = 'count').reset_index()
PPS = PPS.rename(columns={'name':'counts'})
# bardzo ważne żeby wykres był ładnie posortowany
PPS.sort_values('counts', ascending=False, inplace=True)
PPS.reset_index(inplace=True)
PPS
Out[12]:
index type counts
0 6 small_airport 33942
1 2 heliport 11248
2 4 medium_airport 4550
3 1 closed 3529
4 5 seaplane_base 1015
5 3 large_airport 624
6 0 balloonport 23
In [13]:
# Plot Bars
plt.figure(figsize=(8,4), dpi= 280)
plt.bar(PPS['type'], PPS['counts'], color=['#cfe2f3','#9fc5e8','#6fa8dc'], alpha=0.4, width=.5)
#numery na słupakach

for i, val in enumerate(PPS['counts'].values):
    plt.text(i, val, float(val), horizontalalignment='center', verticalalignment='bottom', fontdict={'fontweight':500, 'size':10})

# Decoration
plt.gca().set_xticklabels(PPS['type'], rotation=90, horizontalalignment= 'right')
plt.title("Number of airfields by the type", fontsize=14)
plt.ylabel('counts')
plt.ylim(0, 37000)
plt.show()
In [27]:
import matplotlib.pyplot as plt
color=['#7f6000','#bf9000','#f1c232','#ffd966','#ffe599','#fff2cc']

# Draw plot
fig, ax = plt.subplots(figsize=(7,4), dpi= 280)
ax.vlines(x=PPS.index, ymin=0, ymax=PPS.counts, color=color, alpha=0.7, linewidth=2)
ax.scatter(x=PPS.index, y=PPS.counts, s=75, color='firebrick', alpha=0.7)

# Title, Label, Ticks and Ylim
ax.set_title('Number of airfields by the type', fontdict={'size':10})
ax.set_ylabel('counts')
ax.set_xticks(PPS.index)
ax.set_xticklabels(PPS.type.str.upper(), rotation=90, fontdict={'horizontalalignment': 'right', 'size':8})
#ax.set_ylim(0, 30)

# Annotate
for row in PPS.itertuples():
    ax.text(row.Index, row.counts+.5, s=round(row.counts, 2), horizontalalignment= 'center', verticalalignment='bottom', fontsize=8)

plt.show()
In [28]:
## Draw plot
import matplotlib.patches as patches
color = ['#93c47d','#b6d7a8','#d9ead3','#d0e0e3','#a2c4c9','#76a5af']



fig, ax = plt.subplots(figsize=(16,10), facecolor='white', dpi= 180)
ax.vlines(x=PPS.type, ymin=0, ymax=PPS.counts, color=color, alpha=0.7, linewidth=20)

# Annotate Text
for i, kot in enumerate(PPS.counts):
    ax.text(i, kot+0.5, round(kot, 1), horizontalalignment='center', fontsize=18, rotation=90)


# Title, Label, Ticks and Ylim
ax.set_title('Number of airfields by the type', fontdict={'size':22})
ax.set(ylabel='counts', ylim=(0, 40000))
#plt.xticks(PPS.type, PPS.counts.str.upper(), rotation=30, horizontalalignment='right', fontsize=12)

# Add patches to color the X axis labels
#p1 = patches.Rectangle((.57, -0.005), width=.33, height=.13, alpha=.1, facecolor='grey', transform=fig.transFigure)
#p2 = patches.Rectangle((.124, -0.005), width=.446, height=.13, alpha=.1, facecolor='yellow', transform=fig.transFigure)
#fig.add_artist(p1)
#fig.add_artist(p2)
plt.show()

World Happiness Report

In [16]:
df4 = pd.read_csv('c:/1/WorldHappinessReport.csv')
df4.head(3)# Draw plot
Out[16]:
Unnamed: 0 Country Region Happiness Rank Happiness Score Economy (GDP per Capita) Family Health (Life Expectancy) Freedom Trust (Government Corruption) Generosity Dystopia Residual Year
0 0 Afghanistan Southern Asia 153.0 3.575 0.31982 0.30285 0.30335 0.23414 0.09719 0.36510 1.95210 2015.0
1 1 Albania Central and Eastern Europe 95.0 4.959 0.87867 0.80434 0.81325 0.35733 0.06413 0.14272 1.89894 2015.0
2 2 Algeria Middle East and Northern Africa 68.0 5.605 0.93929 1.07772 0.61766 0.28579 0.17383 0.07822 2.43209 2015.0
In [17]:
PKP = df4.pivot_table(index='Region', values='Happiness Score', aggfunc= 'mean' ).reset_index()
PKP['Happiness Score'] = np.round(PKP['Happiness Score'], decimals=2)

# bardzo ważne żeby wykres był ładnie posortowany
PKP.sort_values('Happiness Score', inplace=True)
PKP.reset_index(inplace=True)

PKP
Out[17]:
index Region Happiness Score
0 8 Sub-Saharan Africa 4.15
1 7 Southern Asia 4.59
2 6 Southeastern Asia 5.36
3 1 Central and Eastern Europe 5.37
4 4 Middle East and Northern Africa 5.39
5 2 Eastern Asia 5.64
6 3 Latin America and Caribbean 6.07
7 9 Western Europe 6.69
8 5 North America 7.23
9 0 Australia and New Zealand 7.30

colors = [‘#d9ead3′,’#b6d7a8′,’#93c47d’,’#6aa84f’]

In [29]:
# Plot Bars
plt.figure(figsize=(8,4), dpi= 280)
plt.bar(PKP['Region'], PKP['Happiness Score'], color=['#d9ead3','#b6d7a8','#93c47d','#6aa84f'], alpha=0.4, width=.9)
#numery na słupakach

for i, val in enumerate(PKP['Happiness Score'].values):
    plt.text(i, val, float(val), horizontalalignment='center', verticalalignment='bottom', fontdict={'fontweight':500, 'size':10})

# Decoration
plt.gca().set_xticklabels(PKP['Region'], rotation=90, horizontalalignment= 'right')
plt.title("Average Happiness Rate", fontsize=14)
plt.ylabel('Rate' )
plt.ylim(0, 9)
plt.show()