# 数値演算、描画用ライブラリ読み込み
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# グラフをinline表示可能にする
%matplotlib inline
# 解像度を上げてinline表示する
%config InlineBackend.figure_format = 'retina'
# 日本語表示用ライブラリ(matplotlibで日本語を使用したい場合)
import japanize_matplotlib
# データサイエンス基礎テキスト (p.5) のデータ
weight = np.array([43.6, 45.2, 45.4, 45.8, 47.2, 47.8, 48.2, 48.7, 48.8, 48.9, 49.0, 49.0, 49.4,
49.5, 49.8, 50.4, 50.5, 50.9, 50.9, 51.2, 51.2, 51.2, 51.3, 51.3, 51.6, 51.7,
51.7, 51.8, 52.0, 52.0, 52.1, 52.1, 52.1, 52.2, 52.3, 52.7, 52.7, 52.8, 52.9,
52.9, 53.1, 53.1, 53.8, 54.0, 54.5, 54.5, 54.6, 54.7, 54.7, 54.7, 54.8, 54.9,
55.1, 55.1, 55.2, 55.3, 55.4, 55.4, 55.4, 55.6, 55.7, 55.8, 55.9, 56.1, 56.3,
56.3, 56.3, 56.4, 56.5, 56.7, 56.8, 57.0, 57.1, 57.1, 57.2, 57.3, 57.6, 57.7,
57.8, 58.1, 58.4, 58.6, 58.7, 58.7, 58.7, 58.7, 59.1, 59.3, 59.9, 60.0, 60.1,
60.3, 60.5, 60.6, 60.6, 60.7, 61.3, 62.7, 64.2, 64.6])
weight
array([43.6, 45.2, 45.4, 45.8, 47.2, 47.8, 48.2, 48.7, 48.8, 48.9, 49. , 49. , 49.4, 49.5, 49.8, 50.4, 50.5, 50.9, 50.9, 51.2, 51.2, 51.2, 51.3, 51.3, 51.6, 51.7, 51.7, 51.8, 52. , 52. , 52.1, 52.1, 52.1, 52.2, 52.3, 52.7, 52.7, 52.8, 52.9, 52.9, 53.1, 53.1, 53.8, 54. , 54.5, 54.5, 54.6, 54.7, 54.7, 54.7, 54.8, 54.9, 55.1, 55.1, 55.2, 55.3, 55.4, 55.4, 55.4, 55.6, 55.7, 55.8, 55.9, 56.1, 56.3, 56.3, 56.3, 56.4, 56.5, 56.7, 56.8, 57. , 57.1, 57.1, 57.2, 57.3, 57.6, 57.7, 57.8, 58.1, 58.4, 58.6, 58.7, 58.7, 58.7, 58.7, 59.1, 59.3, 59.9, 60. , 60.1, 60.3, 60.5, 60.6, 60.6, 60.7, 61.3, 62.7, 64.2, 64.6])
len(weight)
100
plt.hist(weight, ec='k')
(array([ 3., 2., 10., 13., 16., 20., 16., 12., 5., 3.]), array([43.6, 45.7, 47.8, 49.9, 52. , 54.1, 56.2, 58.3, 60.4, 62.5, 64.6]), <BarContainer object of 10 artists>)
bins=np.arange(43,66,2)
bins
array([43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65])
plt.hist(weight, bins=bins, ec='k');
hist_ini = 43; hist_fin = 65; hist_step = 2
bins = np.arange(hist_ini, hist_fin+1, hist_step) # 終点は含まないので+1としている
# データ(weight)と階級端点(bins)を渡すと、各階級の度数が返る
dosu, _ = np.histogram(weight, bins)
cum_dosu = np.cumsum(dosu) # 累積度数(cumulative)
print(bins) # 階級両端
print(dosu) # 度数
print(cum_dosu) # 累積度数
[43 45 47 49 51 53 55 57 59 61 63 65] [ 1 3 6 9 21 12 19 15 10 2 2] [ 1 4 10 19 40 52 71 86 96 98 100]
x_range = np.arange(hist_ini, hist_fin, hist_step)
plt.xticks(np.append(x_range, hist_fin))
plt.yticks(np.arange(0,21,5))
plt.title("〇〇クラス男子の体重分布", fontsize=18)
plt.xlabel("体重 [kg]", fontsize=14)
plt.ylabel("生徒数", fontsize=14)
plt.bar(x_range, dosu, align='edge', color='y', ec="black", width=2);
x_range = np.arange(hist_ini, hist_fin, hist_step)
plt.xticks(np.append(x_range, hist_fin))
plt.yticks(np.arange(0,26,5))
plt.title("〇〇クラス男子の体重分布", fontsize=18)
plt.xlabel("体重 [kg]", fontsize=14)
plt.ylabel("生徒数", fontsize=14)
plt.ylim(0,26)
# グラフを変数に代入
p = plt.bar(x_range, dosu, align='edge', color='y', ec="black", width=2)
plt.bar_label(p, label_type='edge', color='k', fontsize=12);
color1=['y']*len(dosu)
color1
['y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'y']
color1[4]='m'
color1
['y', 'y', 'y', 'y', 'm', 'y', 'y', 'y', 'y', 'y', 'y']
x_range = np.arange(hist_ini, hist_fin, hist_step)
plt.xticks(np.append(x_range, hist_fin))
plt.yticks(np.arange(0,21,5))
plt.title("〇〇クラス男子の体重分布", fontsize=18)
plt.xlabel("体重 [kg]", fontsize=14)
plt.ylabel("生徒数", fontsize=14)
# 水平補助線
plt.axhline(y=7, c='k', ls=':')
plt.axhline(y=17, c='k', ls=':')
plt.axvline(x=50, c='k', ls=':')
plt.bar(x_range, dosu, align='edge', color=color1, ec="black", width=2);
# 内包表記
cond1 = ['c' if i >=17 else 'y' for i in dosu]
cond1
['y', 'y', 'y', 'y', 'c', 'y', 'c', 'y', 'y', 'y', 'y']
cond1 = ['c' for i in dosu]
cond1
['c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c']
x_range = np.arange(hist_ini, hist_fin, hist_step)
plt.xticks(np.append(x_range, hist_fin))
plt.yticks(np.arange(0,21,5))
plt.title("〇〇クラス男子の体重分布", fontsize=18)
plt.xlabel("体重 [kg]", fontsize=14)
plt.ylabel("生徒数", fontsize=14)
# 水平補助線
plt.axhline(y=17, c='k', ls=':')
plt.bar(x_range, dosu, align='edge', color=cond1, ec="black", width=2);
# 内包表記
cond2 = ['c' if 4 <= i < 17 else 'y' for i in dosu]
cond2
['y', 'y', 'c', 'c', 'y', 'c', 'y', 'c', 'c', 'y', 'y']
x_range = np.arange(hist_ini, hist_fin, hist_step)
plt.xticks(np.append(x_range, hist_fin))
plt.yticks(np.arange(0,21,5))
plt.title("〇〇クラス男子の体重分布", fontsize=18)
plt.xlabel("体重 [kg]", fontsize=14)
plt.ylabel("生徒数", fontsize=14)
# 水平補助線
plt.axhline(y=4, c='k', ls=':')
plt.axhline(y=17, c='k', ls=':')
plt.bar(x_range, dosu, align='edge', color=cond2, ec="black", width=2);
x_range = np.arange(hist_ini, hist_fin, hist_step)
plt.xticks(np.append(x_range, hist_fin))
plt.yticks(np.arange(0,21,5))
plt.title("〇〇クラス男子の体重分布", fontsize=18)
plt.xlabel("体重 [kg]", fontsize=14)
plt.ylabel("生徒数", fontsize=14)
# 水平補助線
plt.axhline(y=4, c='k', ls=':')
plt.axhline(y=17, c='k', ls=':')
plt.bar(x_range, dosu, align='edge', color=cond2, ec="black", width=2)
my_ticks = np.arange(hist_ini, hist_fin, hist_step)
plt.twinx()
plt.ylim(0,110)
plt.ylabel("累積度数", fontsize=14)
plt.plot(my_ticks + hist_step/2, cum_dosu, 'mD-');
!pip install seaborn
WARNING: Ignoring invalid distribution -apanize-matplotlib (c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages) WARNING: Ignoring invalid distribution -apanize-matplotlib (c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages) WARNING: Ignoring invalid distribution -apanize-matplotlib (c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages) WARNING: Ignoring invalid distribution -apanize-matplotlib (c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages) WARNING: Ignoring invalid distribution -apanize-matplotlib (c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages) WARNING: Ignoring invalid distribution -apanize-matplotlib (c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages) WARNING: You are using pip version 22.0.4; however, version 22.1 is available. You should consider upgrading via the 'C:\Users\kumanolab01\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.
Requirement already satisfied: seaborn in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (0.11.2) Requirement already satisfied: numpy>=1.15 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from seaborn) (1.21.5) Requirement already satisfied: scipy>=1.0 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from seaborn) (1.7.3) Requirement already satisfied: matplotlib>=2.2 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from seaborn) (3.5.1) Requirement already satisfied: pandas>=0.23 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from seaborn) (1.3.5) Requirement already satisfied: packaging>=20.0 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from matplotlib>=2.2->seaborn) (21.3) Requirement already satisfied: cycler>=0.10 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from matplotlib>=2.2->seaborn) (0.11.0) Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from matplotlib>=2.2->seaborn) (1.3.2) Requirement already satisfied: pillow>=6.2.0 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from matplotlib>=2.2->seaborn) (8.4.0) Requirement already satisfied: pyparsing>=2.2.1 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from matplotlib>=2.2->seaborn) (3.0.6) Requirement already satisfied: fonttools>=4.22.0 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from matplotlib>=2.2->seaborn) (4.28.5) Requirement already satisfied: python-dateutil>=2.7 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from matplotlib>=2.2->seaborn) (2.8.2) Requirement already satisfied: pytz>=2017.3 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from pandas>=0.23->seaborn) (2021.3) Requirement already satisfied: six>=1.5 in c:\users\kumanolab01\appdata\local\programs\python\python310\lib\site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)
with plt.style.context('dark_background'):
my_ticks = np.arange(hist_ini, hist_fin, hist_step)
plt.xticks(np.append(my_ticks,hist_fin))
plt.yticks(np.arange(0,21,5))
plt.title("〇〇クラス男子の体重分布", fontsize=18)
plt.xlabel("体重 [kg]", fontsize=14)
plt.ylabel("生徒数", fontsize=14)
plt.bar(my_ticks, dosu, align='edge',color="c", ec="black", width=2, alpha=0.8)
# place a text box in upper left in axes coords
plt.text(42.5, 20, f"n={dosu.sum()}", fontsize=16)
plt.twinx()
plt.ylim(0,110)
plt.ylabel("累積度数", fontsize=14)
plt.plot(my_ticks + hist_step/2, cum_dosu, "mD-", lw=2)