import pandas as pd
import numpy as np

df = pd.DataFrame({'A' : ['foo', 'foo', 'bar', 'foo', 'bar', 'foo'],
                   'B' : ['one', 'two', 'three','two', 'two', 'one'],
                   'C' : np.random.randint(5, size=6),
                   'D' : np.random.randint(5, size=6),
                   'E' : np.random.randint(5, size=6)})
print (df)

     A      B  C  D  E
0  foo    one  4  4  1
1  foo    two  3  1  4
2  bar  three  3  3  4
3  foo    two  3  1  4
4  bar    two  1  0  0
5  foo    one  4  2  1

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C  D  E
0  bar  three  3  3  4
1  bar    two  1  0  0
2  foo    one  8  6  2
3  foo    two  6  2  8

     A      B  C  D
0  bar  three  3  3
1  bar    two  1  0
2  foo    one  8  6
3  foo    two  6  2

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C  D  E
0  bar  three  3  3  4
1  bar    two  1  0  0
2  foo    one  8  6  2
3  foo    two  6  2  8

     A      B  average  total
0  bar  three        3      3
1  bar    two        1      1
2  foo    one        4      8
3  foo    two        3      6

                C             D             E      
          average total average total average total
A   B                                              
bar three       3     3       3     3       4     4
    two         1     1       0     0       0     0
foo one         4     8       3     6       1     2
    two         3     6       1     2       4     8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

df1 = df.groupby(['A', 'B'], as_index=False)['C'].sum()
print (df1)

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C  D  E
0  bar  three  3  3  4
1  bar    two  1  0  0
2  foo    one  8  6  2
3  foo    two  6  2  8

     A      B  C  D
0  bar  three  3  3
1  bar    two  1  0
2  foo    one  8  6
3  foo    two  6  2

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C  D  E
0  bar  three  3  3  4
1  bar    two  1  0  0
2  foo    one  8  6  2
3  foo    two  6  2  8

     A      B  average  total
0  bar  three        3      3
1  bar    two        1      1
2  foo    one        4      8
3  foo    two        3      6

                C             D             E      
          average total average total average total
A   B                                              
bar three       3     3       3     3       4     4
    two         1     1       0     0       0     0
foo one         4     8       3     6       1     2
    two         3     6       1     2       4     8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_total  D_average
0  bar  three        3          3
1  bar    two        1          0
2  foo    one        8          3
3  foo    two        6          1

df2 = df.groupby(['A', 'B'], as_index=False).sum()
print (df2)

     A      B  C  D  E
0  bar  three  3  3  4
1  bar    two  1  0  0
2  foo    one  8  6  2
3  foo    two  6  2  8

     A      B  C  D
0  bar  three  3  3
1  bar    two  1  0
2  foo    one  8  6
3  foo    two  6  2

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C  D  E
0  bar  three  3  3  4
1  bar    two  1  0  0
2  foo    one  8  6  2
3  foo    two  6  2  8

     A      B  average  total
0  bar  three        3      3
1  bar    two        1      1
2  foo    one        4      8
3  foo    two        3      6

                C             D             E      
          average total average total average total
A   B                                              
bar three       3     3       3     3       4     4
    two         1     1       0     0       0     0
foo one         4     8       3     6       1     2
    two         3     6       1     2       4     8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_total  D_average
0  bar  three        3          3
1  bar    two        1          0
2  foo    one        8          3
3  foo    two        6          1

     A      B  C_total  D_sum_first_and_last
0  bar  three        3                     6
1  bar    two        1                     0
2  foo    one        8                     6
3  foo    two        6                     2

df3 = df.groupby(['A', 'B'], as_index=False)['C','D'].sum()
print (df3)

     A      B  C  D
0  bar  three  3  3
1  bar    two  1  0
2  foo    one  8  6
3  foo    two  6  2

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C  D  E
0  bar  three  3  3  4
1  bar    two  1  0  0
2  foo    one  8  6  2
3  foo    two  6  2  8

     A      B  average  total
0  bar  three        3      3
1  bar    two        1      1
2  foo    one        4      8
3  foo    two        3      6

                C             D             E      
          average total average total average total
A   B                                              
bar three       3     3       3     3       4     4
    two         1     1       0     0       0     0
foo one         4     8       3     6       1     2
    two         3     6       1     2       4     8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_total  D_average
0  bar  three        3          3
1  bar    two        1          0
2  foo    one        8          3
3  foo    two        6          1

     A      B  C_total  D_sum_first_and_last
0  bar  three        3                     6
1  bar    two        1                     0
2  foo    one        8                     6
3  foo    two        6                     2

A    B    
bar  three    3
     two      1
foo  one      8
     two      6
Name: C, dtype: int32

df1 = df.groupby(['A', 'B'], as_index=False)['C'].agg('sum')
print (df1)

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C  D  E
0  bar  three  3  3  4
1  bar    two  1  0  0
2  foo    one  8  6  2
3  foo    two  6  2  8

     A      B  average  total
0  bar  three        3      3
1  bar    two        1      1
2  foo    one        4      8
3  foo    two        3      6

                C             D             E      
          average total average total average total
A   B                                              
bar three       3     3       3     3       4     4
    two         1     1       0     0       0     0
foo one         4     8       3     6       1     2
    two         3     6       1     2       4     8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_total  D_average
0  bar  three        3          3
1  bar    two        1          0
2  foo    one        8          3
3  foo    two        6          1

     A      B  C_total  D_sum_first_and_last
0  bar  three        3                     6
1  bar    two        1                     0
2  foo    one        8                     6
3  foo    two        6                     2

A    B    
bar  three    3
     two      1
foo  one      8
     two      6
Name: C, dtype: int32

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

df2 = df.groupby(['A', 'B'], as_index=False).agg('sum')
print (df2)

     A      B  C  D  E
0  bar  three  3  3  4
1  bar    two  1  0  0
2  foo    one  8  6  2
3  foo    two  6  2  8

     A      B  average  total
0  bar  three        3      3
1  bar    two        1      1
2  foo    one        4      8
3  foo    two        3      6

                C             D             E      
          average total average total average total
A   B                                              
bar three       3     3       3     3       4     4
    two         1     1       0     0       0     0
foo one         4     8       3     6       1     2
    two         3     6       1     2       4     8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_total  D_average
0  bar  three        3          3
1  bar    two        1          0
2  foo    one        8          3
3  foo    two        6          1

     A      B  C_total  D_sum_first_and_last
0  bar  three        3                     6
1  bar    two        1                     0
2  foo    one        8                     6
3  foo    two        6                     2

A    B    
bar  three    3
     two      1
foo  one      8
     two      6
Name: C, dtype: int32

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

df4 = (df.groupby(['A', 'B'])['C'].agg([('average','mean'),('total','sum')]).reset_index())
print (df4)

     A      B  average  total
0  bar  three        3      3
1  bar    two        1      1
2  foo    one        4      8
3  foo    two        3      6

                C             D             E      
          average total average total average total
A   B                                              
bar three       3     3       3     3       4     4
    two         1     1       0     0       0     0
foo one         4     8       3     6       1     2
    two         3     6       1     2       4     8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_total  D_average
0  bar  three        3          3
1  bar    two        1          0
2  foo    one        8          3
3  foo    two        6          1

     A      B  C_total  D_sum_first_and_last
0  bar  three        3                     6
1  bar    two        1                     0
2  foo    one        8                     6
3  foo    two        6                     2

A    B    
bar  three    3
     two      1
foo  one      8
     two      6
Name: C, dtype: int32

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

A
bar     4
foo    14
Name: C, dtype: int32

df5 = (df.groupby(['A', 'B']).agg([('average','mean'),('total','sum')]))

print (df5)

                C             D             E      
          average total average total average total
A   B                                              
bar three       3     3       3     3       4     4
    two         1     1       0     0       0     0
foo one         4     8       3     6       1     2
    two         3     6       1     2       4     8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_total  D_average
0  bar  three        3          3
1  bar    two        1          0
2  foo    one        8          3
3  foo    two        6          1

     A      B  C_total  D_sum_first_and_last
0  bar  three        3                     6
1  bar    two        1                     0
2  foo    one        8                     6
3  foo    two        6                     2

A    B    
bar  three    3
     two      1
foo  one      8
     two      6
Name: C, dtype: int32

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

A
bar     4
foo    14
Name: C, dtype: int32

Index(['bar', 'foo'], dtype='object', name='A')

# łączenie w jedną GŁOWĘ TABELI
df5.columns = df5.columns.map('_'.join)
df5 = df5.reset_index()
print (df5)

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_total  D_average
0  bar  three        3          3
1  bar    two        1          0
2  foo    one        8          3
3  foo    two        6          1

     A      B  C_total  D_sum_first_and_last
0  bar  three        3                     6
1  bar    two        1                     0
2  foo    one        8                     6
3  foo    two        6                     2

A    B    
bar  three    3
     two      1
foo  one      8
     two      6
Name: C, dtype: int32

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

A
bar     4
foo    14
Name: C, dtype: int32

Index(['bar', 'foo'], dtype='object', name='A')

<class 'pandas.core.series.Series'>

ZMIANA NAZWY KOLUMN

df5 = df.groupby(['A', 'B']).agg(['mean','sum'])

df5.columns = (df5.columns.map('_'.join)
               .str.replace('sum','total')
               .str.replace('mean','average'))

df5 = df5.reset_index()
print (df5)

     A      B  C_average  C_total  D_average  D_total  E_average  E_total
0  bar  three          3        3          3        3          4        4
1  bar    two          1        1          0        0          0        0
2  foo    one          4        8          3        6          1        2
3  foo    two          3        6          1        2          4        8

     A      B  C_total  D_average
0  bar  three        3          3
1  bar    two        1          0
2  foo    one        8          3
3  foo    two        6          1

     A      B  C_total  D_sum_first_and_last
0  bar  three        3                     6
1  bar    two        1                     0
2  foo    one        8                     6
3  foo    two        6                     2

A    B    
bar  three    3
     two      1
foo  one      8
     two      6
Name: C, dtype: int32

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

A
bar     4
foo    14
Name: C, dtype: int32

Index(['bar', 'foo'], dtype='object', name='A')

<class 'pandas.core.series.Series'>

     A   C
0  bar   4
1  foo  14

ZMIANA NAZWY KOLUMN BEZPOŚREDNIO

df6 = (df.groupby(['A', 'B'], as_index=False)
         .agg({'C':'sum','D':'mean'})
         .rename(columns={'C':'C_total', 'D':'D_average'}))
print (df6)

     A      B  C_total  D_average
0  bar  three        3          3
1  bar    two        1          0
2  foo    one        8          3
3  foo    two        6          1

     A      B  C_total  D_sum_first_and_last
0  bar  three        3                     6
1  bar    two        1                     0
2  foo    one        8                     6
3  foo    two        6                     2

A    B    
bar  three    3
     two      1
foo  one      8
     two      6
Name: C, dtype: int32

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

A
bar     4
foo    14
Name: C, dtype: int32

Index(['bar', 'foo'], dtype='object', name='A')

<class 'pandas.core.series.Series'>

     A   C
0  bar   4
1  foo  14

     A   C
0  bar   4
1  foo  14

TEGO NIE ROZUMIEM

def func(x):
    return x.iat[0] + x.iat[-1]

df7 = (df.groupby(['A', 'B'], as_index=False)
         .agg({'C':'sum','D': func})
         .rename(columns={'C':'C_total', 'D':'D_sum_first_and_last'}))
print (df7)

     A      B  C_total  D_sum_first_and_last
0  bar  three        3                     6
1  bar    two        1                     0
2  foo    one        8                     6
3  foo    two        6                     2

A    B    
bar  three    3
     two      1
foo  one      8
     two      6
Name: C, dtype: int32

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

A
bar     4
foo    14
Name: C, dtype: int32

Index(['bar', 'foo'], dtype='object', name='A')

<class 'pandas.core.series.Series'>

     A   C
0  bar   4
1  foo  14

     A   C
0  bar   4
1  foo  14

   A      B      C  D
0  a    one  three  1
1  c    two    one  2
2  b  three    two  3
3  b    two    two  2
4  a    two  three  3
5  c    one    two  1
6  b  three    one  2

Table of Contents

No DataFrame after aggregation! What happened?

df1 = df.groupby(['A', 'B'])['C'].sum()
print (df1)

A    B    
bar  three    3
     two      1
foo  one      8
     two      6
Name: C, dtype: int32

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

A
bar     4
foo    14
Name: C, dtype: int32

Index(['bar', 'foo'], dtype='object', name='A')

<class 'pandas.core.series.Series'>

     A   C
0  bar   4
1  foo  14

     A   C
0  bar   4
1  foo  14

   A      B      C  D
0  a    one  three  1
1  c    two    one  2
2  b  three    two  3
3  b    two    two  2
4  a    two  three  3
5  c    one    two  1
6  b  three    one  2

   A                    B
0  a           [one, two]
1  b  [three, two, three]
2  c           [two, one]

# Z DODATKOWYM INDEKSEM
df1 = df.groupby(['A', 'B'], as_index=False)['C'].sum()
print (df1)

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

A
bar     4
foo    14
Name: C, dtype: int32

Index(['bar', 'foo'], dtype='object', name='A')

<class 'pandas.core.series.Series'>

     A   C
0  bar   4
1  foo  14

     A   C
0  bar   4
1  foo  14

   A      B      C  D
0  a    one  three  1
1  c    two    one  2
2  b  three    two  3
3  b    two    two  2
4  a    two  three  3
5  c    one    two  1
6  b  three    one  2

   A                    B
0  a           [one, two]
1  b  [three, two, three]
2  c           [two, one]

   A                B
0  a          one,two
1  b  three,two,three
2  c          two,one

# TO SAMO RESET INDEX
df1 = df.groupby(['A', 'B'])['C'].sum().reset_index()
print (df1)

     A      B  C
0  bar  three  3
1  bar    two  1
2  foo    one  8
3  foo    two  6

A
bar     4
foo    14
Name: C, dtype: int32

Index(['bar', 'foo'], dtype='object', name='A')

<class 'pandas.core.series.Series'>

     A   C
0  bar   4
1  foo  14

     A   C
0  bar   4
1  foo  14

   A      B      C  D
0  a    one  three  1
1  c    two    one  2
2  b  three    two  3
3  b    two    two  2
4  a    two  three  3
5  c    one    two  1
6  b  three    one  2

   A                    B
0  a           [one, two]
1  b  [three, two, three]
2  c           [two, one]

   A                B
0  a          one,two
1  b  three,two,three
2  c          two,one

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

df2 = df.groupby('A')['C'].sum()
print (df2)

A
bar     4
foo    14
Name: C, dtype: int32

Index(['bar', 'foo'], dtype='object', name='A')

<class 'pandas.core.series.Series'>

     A   C
0  bar   4
1  foo  14

     A   C
0  bar   4
1  foo  14

   A      B      C  D
0  a    one  three  1
1  c    two    one  2
2  b  three    two  3
3  b    two    two  2
4  a    two  three  3
5  c    one    two  1
6  b  three    one  2

   A                    B
0  a           [one, two]
1  b  [three, two, three]
2  c           [two, one]

   A                B
0  a          one,two
1  b  three,two,three
2  c          two,one

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

df2.index

Index(['bar', 'foo'], dtype='object', name='A')

print (type(df2))

<class 'pandas.core.series.Series'>

     A   C
0  bar   4
1  foo  14

     A   C
0  bar   4
1  foo  14

   A      B      C  D
0  a    one  three  1
1  c    two    one  2
2  b  three    two  3
3  b    two    two  2
4  a    two  three  3
5  c    one    two  1
6  b  three    one  2

   A                    B
0  a           [one, two]
1  b  [three, two, three]
2  c           [two, one]

   A                B
0  a          one,two
1  b  three,two,three
2  c          two,one

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A                B            C
0  a          one,two  three,three
1  b  three,two,three  two,two,one
2  c          two,one      one,two

   A                B            C      D
0  a          one,two  three,three    1,3
1  b  three,two,three  two,two,one  3,2,2
2  c          two,one      one,two    2,1

df2 = df.groupby('A', as_index=False)['C'].sum()
print (df2)

     A   C
0  bar   4
1  foo  14

     A   C
0  bar   4
1  foo  14

   A      B      C  D
0  a    one  three  1
1  c    two    one  2
2  b  three    two  3
3  b    two    two  2
4  a    two  three  3
5  c    one    two  1
6  b  three    one  2

   A                    B
0  a           [one, two]
1  b  [three, two, three]
2  c           [two, one]

   A                B
0  a          one,two
1  b  three,two,three
2  c          two,one

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A                B            C
0  a          one,two  three,three
1  b  three,two,three  two,two,one
2  c          two,one      one,two

   A                B            C      D
0  a          one,two  three,three    1,3
1  b  three,two,three  two,two,one  3,2,2
2  c          two,one      one,two    2,1

   A      B      C    D
0  a    one  three  NaN
1  c    two    NaN  2.0
2  b  three    NaN  3.0
3  b    two    two  2.0
4  a    two  three  3.0
5  c    one    two  NaN
6  b  three    one  2.0

df2 = df.groupby('A')['C'].sum().reset_index()
print (df2)

     A   C
0  bar   4
1  foo  14

   A      B      C  D
0  a    one  three  1
1  c    two    one  2
2  b  three    two  3
3  b    two    two  2
4  a    two  three  3
5  c    one    two  1
6  b  three    one  2

   A                    B
0  a           [one, two]
1  b  [three, two, three]
2  c           [two, one]

   A                B
0  a          one,two
1  b  three,two,three
2  c          two,one

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A                B            C
0  a          one,two  three,three
1  b  three,two,three  two,two,one
2  c          two,one      one,two

   A                B            C      D
0  a          one,two  three,three    1,3
1  b  three,two,three  two,two,one  3,2,2
2  c          two,one      one,two    2,1

   A      B      C    D
0  a    one  three  NaN
1  c    two    NaN  2.0
2  b  three    NaN  3.0
3  b    two    two  2.0
4  a    two  three  3.0
5  c    one    two  NaN
6  b  three    one  2.0

   A  COUNT
0  a      2
1  b      3
2  c      2

How to aggregate mainly strings columns (to lists, tuples, strings with separator)?

df = pd.DataFrame({'A' : ['a', 'c', 'b', 'b', 'a', 'c', 'b'],
                   'B' : ['one', 'two', 'three','two', 'two', 'one', 'three'],
                   'C' : ['three', 'one', 'two', 'two', 'three','two', 'one'],
                   'D' : [1,2,3,2,3,1,2]})
print (df)

   A      B      C  D
0  a    one  three  1
1  c    two    one  2
2  b  three    two  3
3  b    two    two  2
4  a    two  three  3
5  c    one    two  1
6  b  three    one  2

   A                    B
0  a           [one, two]
1  b  [three, two, three]
2  c           [two, one]

   A                B
0  a          one,two
1  b  three,two,three
2  c          two,one

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A                B            C
0  a          one,two  three,three
1  b  three,two,three  two,two,one
2  c          two,one      one,two

   A                B            C      D
0  a          one,two  three,three    1,3
1  b  three,two,three  two,two,one  3,2,2
2  c          two,one      one,two    2,1

   A      B      C    D
0  a    one  three  NaN
1  c    two    NaN  2.0
2  b  three    NaN  3.0
3  b    two    two  2.0
4  a    two  three  3.0
5  c    one    two  NaN
6  b  three    one  2.0

   A  COUNT
0  a      2
1  b      3
2  c      2

   A  COUNT
0  a      2
1  b      2
2  c      1

WYŚWIETLANIE ZBIORU WYRAZÓW

df1 = df.groupby('A')['B'].agg(list).reset_index()
print (df1)

   A                    B
0  a           [one, two]
1  b  [three, two, three]
2  c           [two, one]

   A                B
0  a          one,two
1  b  three,two,three
2  c          two,one

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A                B            C
0  a          one,two  three,three
1  b  three,two,three  two,two,one
2  c          two,one      one,two

   A                B            C      D
0  a          one,two  three,three    1,3
1  b  three,two,three  two,two,one  3,2,2
2  c          two,one      one,two    2,1

   A      B      C    D
0  a    one  three  NaN
1  c    two    NaN  2.0
2  b  three    NaN  3.0
3  b    two    two  2.0
4  a    two  three  3.0
5  c    one    two  NaN
6  b  three    one  2.0

   A  COUNT
0  a      2
1  b      3
2  c      2

   A  COUNT
0  a      2
1  b      2
2  c      1

   A  B_COUNT  C_COUNT  D_COUNT
0  a        2        2        1
1  b        3        2        3
2  c        2        1        1

TO SAMO ALE BEZ NAWIASÓW KWADRATOWYCH

df2 = df.groupby('A')['B'].agg(','.join).reset_index()
print (df2)

   A                B
0  a          one,two
1  b  three,two,three
2  c          two,one

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A                B            C
0  a          one,two  three,three
1  b  three,two,three  two,two,one
2  c          two,one      one,two

   A                B            C      D
0  a          one,two  three,three    1,3
1  b  three,two,three  two,two,one  3,2,2
2  c          two,one      one,two    2,1

   A      B      C    D
0  a    one  three  NaN
1  c    two    NaN  2.0
2  b  three    NaN  3.0
3  b    two    two  2.0
4  a    two  three  3.0
5  c    one    two  NaN
6  b  three    one  2.0

   A  COUNT
0  a      2
1  b      3
2  c      2

   A  COUNT
0  a      2
1  b      2
2  c      1

   A  B_COUNT  C_COUNT  D_COUNT
0  a        2        2        1
1  b        3        2        3
2  c        2        1        1

   A  COUNT
0  b      3
1  a      2
2  c      2

PRZEKSZTAŁCENIE WYRAZÓW NA CYFRY

df3 = (df.groupby('A')['D']
         .agg(lambda x: ','.join(x.astype(str)))
         .reset_index())
print (df3)

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A                B            C
0  a          one,two  three,three
1  b  three,two,three  two,two,one
2  c          two,one      one,two

   A                B            C      D
0  a          one,two  three,three    1,3
1  b  three,two,three  two,two,one  3,2,2
2  c          two,one      one,two    2,1

   A      B      C    D
0  a    one  three  NaN
1  c    two    NaN  2.0
2  b  three    NaN  3.0
3  b    two    two  2.0
4  a    two  three  3.0
5  c    one    two  NaN
6  b  three    one  2.0

   A  COUNT
0  a      2
1  b      3
2  c      2

   A  COUNT
0  a      2
1  b      2
2  c      1

   A  B_COUNT  C_COUNT  D_COUNT
0  a        2        2        1
1  b        3        2        3
2  c        2        1        1

   A  COUNT
0  b      3
1  a      2
2  c      2

TO SAMO INNYM SPOSOBEM

df3 = (df.assign(D = df['D'].astype(str))
         .groupby('A')['D']
         .agg(','.join).reset_index())
print (df3)

   A      D
0  a    1,3
1  b  3,2,2
2  c    2,1

   A                B            C
0  a          one,two  three,three
1  b  three,two,three  two,two,one
2  c          two,one      one,two

   A                B            C      D
0  a          one,two  three,three    1,3
1  b  three,two,three  two,two,one  3,2,2
2  c          two,one      one,two    2,1

   A      B      C    D
0  a    one  three  NaN
1  c    two    NaN  2.0
2  b  three    NaN  3.0
3  b    two    two  2.0
4  a    two  three  3.0
5  c    one    two  NaN
6  b  three    one  2.0

   A  COUNT
0  a      2
1  b      3
2  c      2

   A  COUNT
0  a      2
1  b      2
2  c      1

   A  B_COUNT  C_COUNT  D_COUNT
0  a        2        2        1
1  b        3        2        3
2  c        2        1        1

   A  COUNT
0  b      3
1  a      2
2  c      2

df

df4 = df.groupby('A').agg(','.join).reset_index()
print (df4)

   A                B            C
0  a          one,two  three,three
1  b  three,two,three  two,two,one
2  c          two,one      one,two

   A                B            C      D
0  a          one,two  three,three    1,3
1  b  three,two,three  two,two,one  3,2,2
2  c          two,one      one,two    2,1

   A      B      C    D
0  a    one  three  NaN
1  c    two    NaN  2.0
2  b  three    NaN  3.0
3  b    two    two  2.0
4  a    two  three  3.0
5  c    one    two  NaN
6  b  three    one  2.0

   A  COUNT
0  a      2
1  b      3
2  c      2

   A  COUNT
0  a      2
1  b      2
2  c      1

   A  B_COUNT  C_COUNT  D_COUNT
0  a        2        2        1
1  b        3        2        3
2  c        2        1        1

   A  COUNT
0  b      3
1  a      2
2  c      2

df5 = (df.groupby('A')
         .agg(lambda x: ','.join(x.astype(str)))
         .reset_index())
print (df5)

   A                B            C      D
0  a          one,two  three,three    1,3
1  b  three,two,three  two,two,one  3,2,2
2  c          two,one      one,two    2,1

   A      B      C    D
0  a    one  three  NaN
1  c    two    NaN  2.0
2  b  three    NaN  3.0
3  b    two    two  2.0
4  a    two  three  3.0
5  c    one    two  NaN
6  b  three    one  2.0

   A  COUNT
0  a      2
1  b      3
2  c      2

   A  COUNT
0  a      2
1  b      2
2  c      1

   A  B_COUNT  C_COUNT  D_COUNT
0  a        2        2        1
1  b        3        2        3
2  c        2        1        1

   A  COUNT
0  b      3
1  a      2
2  c      2

How to aggregate counts?

df = pd.DataFrame({'A' : ['a', 'c', 'b', 'b', 'a', 'c', 'b'],
                   'B' : ['one', 'two', 'three','two', 'two', 'one', 'three'],
                   'C' : ['three', np.nan, np.nan, 'two', 'three','two', 'one'],
                   'D' : [np.nan,2,3,2,3,np.nan,2]})
print (df)

   A      B      C    D
0  a    one  three  NaN
1  c    two    NaN  2.0
2  b  three    NaN  3.0
3  b    two    two  2.0
4  a    two  three  3.0
5  c    one    two  NaN
6  b  three    one  2.0

   A  COUNT
0  a      2
1  b      3
2  c      2

   A  COUNT
0  a      2
1  b      2
2  c      1

   A  B_COUNT  C_COUNT  D_COUNT
0  a        2        2        1
1  b        3        2        3
2  c        2        1        1

   A  COUNT
0  b      3
1  a      2
2  c      2

Pokazuje ilości występowania – ile razy wystąpiło

df1 = df.groupby('A').size().reset_index(name='COUNT')
print (df1)

   A  COUNT
0  a      2
1  b      3
2  c      2

   A  COUNT
0  a      2
1  b      2
2  c      1

   A  B_COUNT  C_COUNT  D_COUNT
0  a        2        2        1
1  b        3        2        3
2  c        2        1        1

   A  COUNT
0  b      3
1  a      2
2  c      2

ILE RAZY BYŁO DLA A BEZ PUSTYCH KOMÓREK

df2 = df.groupby('A')['C'].count().reset_index(name='COUNT')
print (df2)

   A  COUNT
0  a      2
1  b      2
2  c      1

   A  B_COUNT  C_COUNT  D_COUNT
0  a        2        2        1
1  b        3        2        3
2  c        2        1        1

   A  COUNT
0  b      3
1  a      2
2  c      2

df

Dla każdej kolumny ile razy było a, b i c

Tylko dla wartości bez pustych komórek

df3 = df.groupby('A').count().add_suffix('_COUNT').reset_index()
print (df3)

   A  B_COUNT  C_COUNT  D_COUNT
0  a        2        2        1
1  b        3        2        3
2  c        2        1        1

   A  COUNT
0  b      3
1  a      2
2  c      2

df

df4 = (df['A'].value_counts()
              .rename_axis('A')
              .reset_index(name='COUNT'))
print (df4)

   A  COUNT
0  b      3
1  a      2
2  c      2

THE DATA SCIENCE LIBRARY

Wojciech Moszczyński

Formuła Pandas: groupby part 2

No DataFrame after aggregation! What happened?

How to aggregate mainly strings columns (to lists, tuples, strings with separator)?

How to aggregate counts?

	A	B	C	D
0	a	one	three	1
1	c	two	one	2
2	b	three	two	3
3	b	two	two	2
4	a	two	three	3
5	c	one	two	1
6	b	three	one	2