KEYWORDS : crosstab, groupby, value_counts, pivot table
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import japanize_matplotlib
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
url = 'https://www.ces-alpha.org/course/file_serve/5706059750572032/dsa_tipdata.xlsx'
df_tips = pd.read_excel(url)
df_tips.head()
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
# データサイズ確認
print(len(df_tips))
print(df_tips.shape)
244 (244, 7)
# データタイプ等の確認
df_tips.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 244 entries, 0 to 243 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 total_bill 244 non-null float64 1 tip 244 non-null float64 2 sex 244 non-null object 3 smoker 244 non-null object 4 day 244 non-null object 5 time 244 non-null object 6 size 244 non-null int64 dtypes: float64(2), int64(1), object(4) memory usage: 13.5+ KB
# 基本統計量の取得
df_tips.describe()
total_bill | tip | size | |
---|---|---|---|
count | 244.000000 | 244.000000 | 244.000000 |
mean | 19.785943 | 2.998279 | 2.569672 |
std | 8.902412 | 1.383638 | 0.951100 |
min | 3.070000 | 1.000000 | 1.000000 |
25% | 13.347500 | 2.000000 | 2.000000 |
50% | 17.795000 | 2.900000 | 2.000000 |
75% | 24.127500 | 3.562500 | 3.000000 |
max | 50.810000 | 10.000000 | 6.000000 |
# データの加工(tip割合)
df_tips['tip_pct'] = df_tips['tip'] / df_tips['total_bill']
df_tips
total_bill | tip | sex | smoker | day | time | size | tip_pct | |
---|---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | 0.059447 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | 0.160542 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 | 0.166587 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 | 0.139780 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 | 0.146808 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 | 0.203927 |
240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 | 0.073584 |
241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 | 0.088222 |
242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 | 0.098204 |
243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 | 0.159744 |
244 rows × 8 columns
# 客単価
df_tips['bill_person'] = df_tips['total_bill'] / df_tips['size']
df_tips
total_bill | tip | sex | smoker | day | time | size | tip_pct | bill_person | |
---|---|---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | 0.059447 | 8.495000 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | 0.160542 | 3.446667 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 | 0.166587 | 7.003333 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 | 0.139780 | 11.840000 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 | 0.146808 | 6.147500 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 | 0.203927 | 9.676667 |
240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 | 0.073584 | 13.590000 |
241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 | 0.088222 | 11.335000 |
242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 | 0.098204 | 8.910000 |
243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 | 0.159744 | 9.390000 |
244 rows × 9 columns
複数条件下でのデータ件数の集計
# 曜日(day)の別を縦、喫煙の有無を横に取った場合のデータ件数
pd.crosstab(df_tips.day, df_tips.smoker)
smoker | No | Yes |
---|---|---|
day | ||
Fri | 4 | 15 |
Sat | 45 | 42 |
Sun | 57 | 19 |
Thur | 45 | 17 |
# 曜日(day)の別 & 昼食/夕食の別を縦、喫煙の有無を横に取った場合のデータ件数
pd.crosstab([df_tips.day, df_tips.time], df_tips.smoker)
smoker | No | Yes | |
---|---|---|---|
day | time | ||
Fri | Dinner | 3 | 9 |
Lunch | 1 | 6 | |
Sat | Dinner | 45 | 42 |
Sun | Dinner | 57 | 19 |
Thur | Dinner | 1 | 0 |
Lunch | 44 | 17 |
# 小計を追加
# 曜日(day)の別 & 昼食/夕食の別を縦、喫煙の有無を横に取った場合のデータ件数
pd.crosstab([df_tips.day, df_tips.time], df_tips.smoker, margins=True)
smoker | No | Yes | All | |
---|---|---|---|---|
day | time | |||
Fri | Dinner | 3 | 9 | 12 |
Lunch | 1 | 6 | 7 | |
Sat | Dinner | 45 | 42 | 87 |
Sun | Dinner | 57 | 19 | 76 |
Thur | Dinner | 1 | 0 | 1 |
Lunch | 44 | 17 | 61 | |
All | 151 | 93 | 244 |
# 曜日(day)の別を縦、支払額が20 USD以上かどうかを横に取った場合のデータ件数
pd.crosstab(df_tips.day, df_tips.total_bill >=20)
total_bill | False | True |
---|---|---|
day | ||
Fri | 13 | 6 |
Sat | 49 | 38 |
Sun | 39 | 37 |
Thur | 46 | 16 |
# 性別で各データ項目の傾向を分析
df_tips.groupby('sex').mean()
total_bill | tip | size | tip_pct | bill_person | |
---|---|---|---|---|---|
sex | |||||
Female | 18.056897 | 2.833448 | 2.459770 | 0.166491 | 7.463918 |
Male | 20.744076 | 3.089618 | 2.630573 | 0.157651 | 8.123358 |
# 性別&喫煙
df_tips.groupby(['sex', 'smoker']).mean()
total_bill | tip | size | tip_pct | bill_person | ||
---|---|---|---|---|---|---|
sex | smoker | |||||
Female | No | 18.105185 | 2.773519 | 2.592593 | 0.156921 | 7.237685 |
Yes | 17.977879 | 2.931515 | 2.242424 | 0.182150 | 7.834116 | |
Male | No | 19.791237 | 3.113402 | 2.711340 | 0.160669 | 7.433395 |
Yes | 22.284500 | 3.051167 | 2.500000 | 0.152771 | 9.238797 |
# 特定項目の複数の統計量を取得(例:性別でチップ割合の平均と標準偏差)
# agg: aggregation (集計・集約)
grouped = df_tips.groupby(['sex'])
grouped_pct = grouped['tip_pct'].agg(['mean', 'std'])
grouped_pct
mean | std | |
---|---|---|
sex | ||
Female | 0.166491 | 0.053632 |
Male | 0.157651 | 0.064778 |
df_tips.groupby(['sex']).agg(['mean', 'std'])
total_bill | tip | size | tip_pct | bill_person | ||||||
---|---|---|---|---|---|---|---|---|---|---|
mean | std | mean | std | mean | std | mean | std | mean | std | |
sex | ||||||||||
Female | 18.056897 | 8.009209 | 2.833448 | 1.159495 | 2.459770 | 0.937644 | 0.166491 | 0.053632 | 7.463918 | 2.461519 |
Male | 20.744076 | 9.246469 | 3.089618 | 1.489102 | 2.630573 | 0.955997 | 0.157651 | 0.064778 | 8.123358 | 3.119912 |
df_tips['size'].value_counts()
2 156 3 38 4 37 5 5 1 4 6 4 Name: size, dtype: int64
df_tips
total_bill | tip | sex | smoker | day | time | size | tip_pct | bill_person | |
---|---|---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | 0.059447 | 8.495000 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | 0.160542 | 3.446667 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 | 0.166587 | 7.003333 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 | 0.139780 | 11.840000 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 | 0.146808 | 6.147500 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
239 | 29.03 | 5.92 | Male | No | Sat | Dinner | 3 | 0.203927 | 9.676667 |
240 | 27.18 | 2.00 | Female | Yes | Sat | Dinner | 2 | 0.073584 | 13.590000 |
241 | 22.67 | 2.00 | Male | Yes | Sat | Dinner | 2 | 0.088222 | 11.335000 |
242 | 17.82 | 1.75 | Male | No | Sat | Dinner | 2 | 0.098204 | 8.910000 |
243 | 18.78 | 3.00 | Female | No | Thur | Dinner | 2 | 0.159744 | 9.390000 |
244 rows × 9 columns
df_tips['size'].unique()
array([2, 3, 4, 1, 6, 5], dtype=int64)
df_tips['day'].unique()
array(['Sun', 'Sat', 'Thur', 'Fri'], dtype=object)
# 縦に性別属性、横に曜日属性
# これら全ての組み合わせについてtip_pctの値を調べ、
# これらの値から得られる関数値(平均、和など)を表示。
pd.pivot_table(df_tips, values='tip_pct', index='sex', columns='day', aggfunc=np.mean)
day | Fri | Sat | Sun | Thur |
---|---|---|---|---|
sex | ||||
Female | 0.199388 | 0.156470 | 0.181569 | 0.157525 |
Male | 0.143385 | 0.151577 | 0.162344 | 0.165276 |
# 縦に性別属性、横に曜日属性
# これら全ての組み合わせについてtip_pctの値を調べ、
# これらの値から得られる関数値(平均、和など)を表示。
pd.pivot_table(df_tips, values='tip_pct', index=['sex', 'smoker'], columns='day', aggfunc=np.mean)
day | Fri | Sat | Sun | Thur | |
---|---|---|---|---|---|
sex | smoker | ||||
Female | No | 0.165296 | 0.147993 | 0.165710 | 0.155971 |
Yes | 0.209129 | 0.163817 | 0.237075 | 0.163073 | |
Male | No | 0.138005 | 0.162132 | 0.158291 | 0.165706 |
Yes | 0.144730 | 0.139067 | 0.173964 | 0.164417 |
# 縦に性別属性、横に曜日属性
# これら全ての組み合わせについて客単価の値を調べ、
# これらの値から得られる関数値(平均、和など)を表示。
pd.pivot_table(df_tips, values='bill_person', index=['sex', 'smoker'], columns='day', aggfunc=np.mean)
day | Fri | Sat | Sun | Thur | |
---|---|---|---|---|---|
sex | smoker | ||||
Female | No | 8.350833 | 8.487628 | 6.811667 | 6.737233 |
Yes | 6.327143 | 8.928111 | 6.642917 | 7.677500 | |
Male | No | 8.737500 | 7.450651 | 7.317957 | 7.523567 |
Yes | 9.507188 | 8.503981 | 10.737244 | 8.760417 |
# データサイズ確認
print(len(df_tips))
print(df_tips.shape)
244 (244, 9)
# データタイプ等の確認
df_tips.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 244 entries, 0 to 243 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 total_bill 244 non-null float64 1 tip 244 non-null float64 2 sex 244 non-null object 3 smoker 244 non-null object 4 day 244 non-null object 5 time 244 non-null object 6 size 244 non-null int64 7 tip_pct 244 non-null float64 8 bill_person 244 non-null float64 dtypes: float64(4), int64(1), object(4) memory usage: 17.3+ KB
# 基本統計量の確認
df_tips.describe()
total_bill | tip | size | tip_pct | bill_person | |
---|---|---|---|---|---|
count | 244.000000 | 244.000000 | 244.000000 | 244.000000 | 244.00000 |
mean | 19.785943 | 2.998279 | 2.569672 | 0.160803 | 7.88823 |
std | 8.902412 | 1.383638 | 0.951100 | 0.061072 | 2.91435 |
min | 3.070000 | 1.000000 | 1.000000 | 0.035638 | 2.87500 |
25% | 13.347500 | 2.000000 | 2.000000 | 0.129127 | 5.80250 |
50% | 17.795000 | 2.900000 | 2.000000 | 0.154770 | 7.25500 |
75% | 24.127500 | 3.562500 | 3.000000 | 0.191475 | 9.39000 |
max | 50.810000 | 10.000000 | 6.000000 | 0.710345 | 20.27500 |
# day列に含まれるデータを表示
df_tips['day'].unique()
array(['Sun', 'Sat', 'Thur', 'Fri'], dtype=object)
# time列に含まれるデータを表示
df_tips['time'].unique()
array(['Dinner', 'Lunch'], dtype=object)
# size列に含まれるデータを表示
df_tips['size'].unique()
array([2, 3, 4, 1, 6, 5], dtype=int64)
# データの加工(演算/追加)
df_tips['tip_pct'] = df_tips['tip'] / df_tips['total_bill']
df_tips.head()
total_bill | tip | sex | smoker | day | time | size | tip_pct | bill_person | |
---|---|---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | 0.059447 | 8.495000 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | 0.160542 | 3.446667 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 | 0.166587 | 7.003333 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 | 0.139780 | 11.840000 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 | 0.146808 | 6.147500 |
# データの加工(演算/追加)
df_tips['bill_person'] = df_tips['total_bill'] / df_tips['size']
df_tips.head()
total_bill | tip | sex | smoker | day | time | size | tip_pct | bill_person | |
---|---|---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | 0.059447 | 8.495000 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | 0.160542 | 3.446667 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 | 0.166587 | 7.003333 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 | 0.139780 | 11.840000 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 | 0.146808 | 6.147500 |
# 曜日(day)の別を縦、喫煙の有無を横に取った場合のデータ件数
pd.crosstab(df_tips.day, df_tips.smoker)
smoker | No | Yes |
---|---|---|
day | ||
Fri | 4 | 15 |
Sat | 45 | 42 |
Sun | 57 | 19 |
Thur | 45 | 17 |
# 昼食/夕食、曜日を縦、喫煙の有無を横に取った場合の各データ件数
pd.crosstab([df_tips.time, df_tips.day], df_tips.smoker)
smoker | No | Yes | |
---|---|---|---|
time | day | ||
Dinner | Fri | 3 | 9 |
Sat | 45 | 42 | |
Sun | 57 | 19 | |
Thur | 1 | 0 | |
Lunch | Fri | 1 | 6 |
Thur | 44 | 17 |
# 上に小計を追加
pd.crosstab([df_tips.time, df_tips.day], df_tips.smoker, margins=True)
smoker | No | Yes | All | |
---|---|---|---|---|
time | day | |||
Dinner | Fri | 3 | 9 | 12 |
Sat | 45 | 42 | 87 | |
Sun | 57 | 19 | 76 | |
Thur | 1 | 0 | 1 | |
Lunch | Fri | 1 | 6 | 7 |
Thur | 44 | 17 | 61 | |
All | 151 | 93 | 244 |
# 曜日(day)の別を縦、支払額が20 USD以上かどうかを横に取った場合のデータ件数
pd.crosstab(df_tips.day, df_tips.total_bill>=20, margins=True)
total_bill | False | True | All |
---|---|---|---|
day | |||
Fri | 13 | 6 | 19 |
Sat | 49 | 38 | 87 |
Sun | 39 | 37 | 76 |
Thur | 46 | 16 | 62 |
All | 147 | 97 | 244 |
# 性別で各データ項目の傾向を比較
df_tips.groupby('sex').mean()
total_bill | tip | size | tip_pct | bill_person | |
---|---|---|---|---|---|
sex | |||||
Female | 18.056897 | 2.833448 | 2.459770 | 0.166491 | 7.463918 |
Male | 20.744076 | 3.089618 | 2.630573 | 0.157651 | 8.123358 |
# 性別と喫煙/禁煙で各データ項目の傾向を比較
df_tips.groupby(['sex', 'smoker']).mean()
total_bill | tip | size | tip_pct | bill_person | ||
---|---|---|---|---|---|---|
sex | smoker | |||||
Female | No | 18.105185 | 2.773519 | 2.592593 | 0.156921 | 7.237685 |
Yes | 17.977879 | 2.931515 | 2.242424 | 0.182150 | 7.834116 | |
Male | No | 19.791237 | 3.113402 | 2.711340 | 0.160669 | 7.433395 |
Yes | 22.284500 | 3.051167 | 2.500000 | 0.152771 | 9.238797 |
# 特定項目の複数統計量を調査:【例】性別でチップ割合(tip_pct)の平均と標準偏差の傾向を調べる
# agg: aggregation(データ集計・集約)
grouped = df_tips.groupby(['sex']) # 着目する属性指定
grouped_pct = grouped['tip_pct'].agg(['mean', 'std']) # 各属性で調べたい項目と統計量を指定
grouped_pct
mean | std | |
---|---|---|
sex | ||
Female | 0.166491 | 0.053632 |
Male | 0.157651 | 0.064778 |
# 特定項目の複数統計量を調査:【例】性別・喫煙/禁煙でチップ割合(tip_pct)の平均と標準偏差の傾向を調べる
grouped = df_tips.groupby(['sex', 'smoker']) # 着目する属性指定
grouped_pct = grouped['tip_pct'].agg(['mean', 'std']) # 各属性で調べたい項目と統計量を指定
grouped_pct
mean | std | ||
---|---|---|---|
sex | smoker | ||
Female | No | 0.156921 | 0.036421 |
Yes | 0.182150 | 0.071595 | |
Male | No | 0.160669 | 0.041849 |
Yes | 0.152771 | 0.090588 |
# 特定項目の複数統計量を調査:【例】曜日で支払総額(total_bill)の平均と標準偏差の傾向を調べる
grouped = df_tips.groupby(['day']) # 着目する属性指定
grouped_total = grouped['total_bill'].agg(['mean', 'std']) # 各属性で調べたい項目と統計量を指定
grouped_total
mean | std | |
---|---|---|
day | ||
Fri | 17.151579 | 8.302660 |
Sat | 20.441379 | 9.480419 |
Sun | 21.410000 | 8.832122 |
Thur | 17.682742 | 7.886170 |
# sizeの値毎の最小値と平均値、標準偏差を出力
#df_tips.groupby('size').agg(['min', 'mean', 'std'])
# sizeの値毎の、tip_pctについては最大最小値を、tipについては和を出力
df_tips.groupby('size').agg({'tip_pct': ['min', 'max'], 'tip': 'sum'})
tip_pct | tip | ||
---|---|---|---|
min | max | sum | |
size | |||
1 | 0.137931 | 0.325733 | 5.75 |
2 | 0.035638 | 0.710345 | 402.84 |
3 | 0.056433 | 0.230742 | 128.94 |
4 | 0.077459 | 0.280535 | 153.01 |
5 | 0.065660 | 0.241663 | 20.14 |
6 | 0.103799 | 0.195335 | 20.90 |
df_tips['size'].value_counts(sort=False)
2 156 3 38 4 37 1 4 6 4 5 5 Name: size, dtype: int64
# 出現回数を上のdataframeに追加
tmp = df_tips.groupby('size').agg({'tip_pct': ['min', 'max'], 'tip': 'sum'})
tmp
tip_pct | tip | ||
---|---|---|---|
min | max | sum | |
size | |||
1 | 0.137931 | 0.325733 | 5.75 |
2 | 0.035638 | 0.710345 | 402.84 |
3 | 0.056433 | 0.230742 | 128.94 |
4 | 0.077459 | 0.280535 | 153.01 |
5 | 0.065660 | 0.241663 | 20.14 |
6 | 0.103799 | 0.195335 | 20.90 |
tmp['count'] = df_tips['size'].value_counts()
tmp
tip_pct | tip | count | ||
---|---|---|---|---|
min | max | sum | ||
size | ||||
1 | 0.137931 | 0.325733 | 5.75 | 4 |
2 | 0.035638 | 0.710345 | 402.84 | 156 |
3 | 0.056433 | 0.230742 | 128.94 | 38 |
4 | 0.077459 | 0.280535 | 153.01 | 37 |
5 | 0.065660 | 0.241663 | 20.14 | 5 |
6 | 0.103799 | 0.195335 | 20.90 | 4 |
# 縦には'sex'属性、横には'day'属性に着目。
# これらの全ての組み合わせについて、
# tip_pctの値を調べ、これらの値から得られる関数値(和や平均値など)を表示する。
pd.pivot_table(df_tips, values='tip_pct', index=['sex'],
columns=['day'], aggfunc=np.mean)
day | Fri | Sat | Sun | Thur |
---|---|---|---|---|
sex | ||||
Female | 0.199388 | 0.156470 | 0.181569 | 0.157525 |
Male | 0.143385 | 0.151577 | 0.162344 | 0.165276 |
# 縦には'size'属性、横には'day'属性に着目。
# これらの全ての組み合わせについて、
# total_billの値を調べ、これらの値から得られる関数値(和や平均値など)を表示する。
pd.pivot_table(df_tips, values='total_bill', index=['size'],
columns=['day'], aggfunc=np.mean)
day | Fri | Sat | Sun | Thur |
---|---|---|---|---|
size | ||||
1 | 8.580000 | 5.160000 | NaN | 10.070000 |
2 | 16.321875 | 16.837170 | 17.560000 | 15.156875 |
3 | 15.980000 | 25.509444 | 22.184000 | 19.160000 |
4 | 40.170000 | 29.876154 | 26.688333 | 29.950000 |
5 | NaN | 28.150000 | 27.000000 | 41.190000 |
6 | NaN | NaN | 48.170000 | 30.383333 |
# 縦には'sex' & 'smoker' 属性、横には'day'属性に着目。
# これらの全ての組み合わせについて、
# tip_pctの値を調べ、これらの値から得られる関数値(和や平均値など)を表示する。
pd.pivot_table(df_tips, values='tip_pct', index=['sex', 'smoker'],
columns=['day'], aggfunc=np.mean)
day | Fri | Sat | Sun | Thur | |
---|---|---|---|---|---|
sex | smoker | ||||
Female | No | 0.165296 | 0.147993 | 0.165710 | 0.155971 |
Yes | 0.209129 | 0.163817 | 0.237075 | 0.163073 | |
Male | No | 0.138005 | 0.162132 | 0.158291 | 0.165706 |
Yes | 0.144730 | 0.139067 | 0.173964 | 0.164417 |
# 縦には'size' 属性、横には'day'属性に着目。
# これらの全ての組み合わせについて、
# total_billの値を調べ、これらの値から得られる関数値(和や平均値など)を表示する。
pd.pivot_table(df_tips, values='total_bill', index=['size'],
columns=['day'], aggfunc=np.mean)
day | Fri | Sat | Sun | Thur |
---|---|---|---|---|
size | ||||
1 | 8.580000 | 5.160000 | NaN | 10.070000 |
2 | 16.321875 | 16.837170 | 17.560000 | 15.156875 |
3 | 15.980000 | 25.509444 | 22.184000 | 19.160000 |
4 | 40.170000 | 29.876154 | 26.688333 | 29.950000 |
5 | NaN | 28.150000 | 27.000000 | 41.190000 |
6 | NaN | NaN | 48.170000 | 30.383333 |
# 客単価
pd.pivot_table(df_tips, values='bill_person', index=['size'],
columns=['day'], aggfunc=np.mean)
day | Fri | Sat | Sun | Thur |
---|---|---|---|---|
size | ||||
1 | 8.580000 | 5.160000 | NaN | 10.070000 |
2 | 8.160937 | 8.418585 | 8.780000 | 7.578437 |
3 | 5.326667 | 8.503148 | 7.394667 | 6.386667 |
4 | 10.042500 | 7.469038 | 6.672083 | 7.487500 |
5 | NaN | 5.630000 | 5.400000 | 8.238000 |
6 | NaN | NaN | 8.028333 | 5.063889 |