Python PANDAS
Pandas縺ィ縺ッ
繝繝シ繧ソ蛻譫(繝繝シ繧ソ繧オ繧、繧ィ繝ウ繧ケ)繝ゥ繧、繝悶Λ繝ェ
繝サ繝繝シ繧ソ縺ョ邨ア險磯剰。ィ遉コ
繝サ繝繝シ繧ソ縺ョ繧ー繝ゥ繝募喧
遲
Series
荳谺。蜈繝繝シ繧ソ讒矩
Value + Index
DataFrame縺ョ讒区占ヲ∫エ縺ィ縺ェ繧
List竊担eries
import pandas as pd
list = ['CakePHP3', 'Django', 'Spring-Boot']
s = pd.Series(list)
0 | CakePHP3 |
1 | Django |
2 | Spring-Boot |
# Index譛ェ螳夂セゥ縺ョ蝣エ蜷医ッ閾ェ蜍輔〒螳夂セゥ縺輔l繧
莠梧ャ。蜈キist竊担eries
list = [['CakePHP3', 'Laravel'],['Django', ''],['Spring-Boot','Struts']]
s = pd.Series(list)
0 | [CakePHP3, Laravel] |
1 | [Django, ] |
2 | [Spring-Boot, Struts] |
# Series縺ァ莠梧ャ。蜈繧定。ィ迴セ縺吶k縺ィ縲∝、縺碁榊励↓縺ェ繧
Index繧貞ョ夂セゥ
list = ['CakePHP3', 'Django', 'Spring-Boot']
s = pd.Series(list, index=['php', 'Python', 'Java'])
php | CakePHP3 |
Python | Django |
Java | Spring-Boot |
騾」諠ウ驟榊冷担eries
dic = {'php':'CakePHP3', 'Python':'Django', 'Java':'Spring-Boot'}
s = pd.Series(dic)
php | CakePHP3 |
Python | Django |
Java | Spring-Boot |
# 騾」諠ウ驟榊励ョKey縺瑚ェ蜍輔〒Index縺ィ隕九↑縺輔l繧
dic = {'php':'CakePHP3', 'Python':'Django', 'Java':'Spring-Boot'}
s = pd.Series(dic, index=['php', 'Python', 'Java', 'C#'])
php | CakePHP3 |
Python | Django |
Java | Spring-Boot |
C# | NaN |
# Indexシ咾#縺ョValue縺ッ謖螳壹&繧後※縺縺ェ縺縺ョ縺ァNaN(Not a Number)縺ォ
Value縺ョ蜿門セ
print(s[0]) # CakePHP3
print(s['php']) # CakePHP3
Value縺ョ霑ス蜉
result = s.append(s)
php | CakePHP3 |
Python | Django |
Java | Spring-Boot |
php | CakePHP3 |
Python | Django |
Java | Spring-Boot |
Series縺ッ莠梧ャ。蜈縺ォ縺ッ縺ェ繧峨↑縺
DataFrame
莠梧ャ。蜈繝繝シ繧ソ讒矩
Series繧定ヲ∫エ縺ォ謖√▽
騾」諠ウ驟榊冷奪ataFrame
Key=Column縲〃alue=Value
dic = {'FW1':['CakePHP3', 'Django', 'Spring-Boot'], 'FW2':['Laravel', '', 'Struts']}
df = pd.DataFrame(dic, index=['php', 'Python', 'Java'])
FW1 | FW2 | |
php | CakePHP3 | Laravel |
Python | Django | Django |
Java | Spring-Boot | Struts |
Value縺碁榊励〒縺ェ縺騾」諠ウ驟榊励ッ繧ィ繝ゥ繝シ
dic = {'php':'CakePHP3', 'Python':'Django', 'Java':'Spring-Boot'}
df = pd.DataFrame(dic)
print(df)# 繧ィ繝ゥ繝シ
Value縺碁榊励ョ騾」諠ウ驟榊励°繧吋ataFrame繧貞ョ夂セゥ
Key=Column縲〃alue=Value
dic = {'php':['CakePHP3'], 'Python':['Django'], 'Java':['Spring-Boot']}
df = pd.DataFrame(dic)
php | Python | Java | |
0 | CakePHP3 | Django | Spring-Boot |
繝ェ繧ケ繝遺奪ataFrame
list = ['CakePHP3', 'Django', 'Spring-Boot']
df = pd.DataFrame(list)
0 | CakePHP3 |
1 | Django |
2 | Spring-Boot |
Index繧貞ョ夂セゥ
list = ['CakePHP3', 'Django', 'Spring-Boot']
df = pd.DataFrame(list, index=['php', 'Python', 'Java'])
0 | |
php | CakePHP3 |
Python | Django |
Java | Spring-Boot |
Column繧貞ョ夂セゥ
list = ['CakePHP3', 'Django', 'Spring-Boot']
df = pd.DataFrame(list, columns=['FW'])
FW | |
0 | CakePHP3 |
1 | Django |
2 | Spring-Boot |
Index + Column繧貞ョ夂セゥ
list = ['CakePHP3', 'Django', 'Spring-Boot']
df = pd.DataFrame(list, index=['php', 'Python', 'Java'], columns=['FW'])
FW | |
php | CakePHP3 |
Python | Django |
Java | Spring-Boot |
Series竊奪ataFrame
s = pd.Series({'php':'CakePHP3', 'Python':'Django', 'Java':'Spring-Boot'})
df = pd.DataFrame(s)
php | CakePHP3 |
Python | Django |
Java | Spring-Boot |
Column繧貞ョ夂セゥ
s = pd.Series({'php':'CakePHP3', 'Python':'Django', 'Java':'Spring-Boot'})
df = pd.DataFrame(s, columns=['FW'] )
FW | |
php | CakePHP3 |
Python | Django |
Java | Spring-Boot |
蜿り
Series縺ョ莠梧ャ。蜈驟榊
df = pd.DataFrame([s, s])
php | Python | Java | |
0 | CakePHP3 | Django | Spring-Boot |
1 | CakePHP3 | Django | Spring-Boot |
蜿り
騾」諠ウ驟榊励ョValue縺郡eries縺ョ蝣エ蜷
# Series竊奪ataFrame
idx = ['php', 'Python', 'Java']
FW = pd.Series(['CakePHP3', 'Django', 'Spring-Boot'])
exp = pd.Series([3, 1, 3])
dic = {'FW':FW, 'exp':exp}
df = pd.DataFrame(dic)
FW | exp | |
0 | CakePHP3 | 3 |
1 | Django | 1 |
2 | Spring-Boot | 3 |
蛻苓ソス蜉
df = pd.DataFrame(
縲縲{'FW':['CakePHP3', 'Django', 'Spring']},
縲縲index=['php', 'Python', 'Java'])
FW | |
php | CakePHP3 |
Python | Django |
Java | Spring-Boot |
df['exp'] = 0
FW | exp | |
php | CakePHP3 | 0 |
Python | Django | 0 |
Java | Spring-Boot | 0 |
df['exp'] = [3, 1, 3]
FW | exp | |
php | CakePHP3 | 3 |
Python | Django | 1 |
Java | Spring-Boot | 3 |
隕∫エ謨ー荳堺ク閾エ繧ィ繝ゥ繝シ
#df['exp'] = [3, 1, 3, 5]
髢「謨ー
new = df.assign(exp=[3, 1, 3], per=[5, 3, 5])
FW | exp | |
php | CakePHP3 | 5 |
Python | Django | 3 |
Java | Spring-Boot | 5 |
inplace荳榊庄縲∝酔蜷阪ョ蛻励r謖螳壹@縺溷エ蜷医ッ荳頑嶌縺
Series縺ァ蛻苓ソス蜉
df = pd.DataFrame({'FW':['CakePHP3', 'Django', 'Spring']})
df.index = ['php', 'Python', 'Java']
FW | |
php | CakePHP3 |
Python | Django |
Java | Spring |
s = pd.Series([3, 1, 3], index=['php', 'Python', 'Java'])
php | 3 |
Python | 1 |
Java | 3 |
df['exp'] = s
FW | exp | |
php | CakePHP3 | 3 |
Python | Django | 1 |
Java | Spring | 3 |
陦悟苓サ「謠
new = df.T
php | Python | Java | |
FW | CakePHP3 | Django | Spring |
exp | 3 | 1 | 3 |
Index
Index繧ェ繝悶ず繧ァ繧ッ繝
DB縺ォ縺翫¢繧句推繝ャ繧ウ繝シ繝峨↓蜷咲ァー繧剃サ倥¢繧区ァ倥↑讎ょソオ
a = pd.DataFrame(
縲縲[[1,1,1,],[2,1,2],[3,2,3]],
縲縲#Index繧ェ繝悶ず繧ァ繧ッ繝茨シ壹Λ繝吶Ν
縲縲index=["one","two","three"],
縲縲#Index繧ェ繝悶ず繧ァ繧ッ繝茨シ壹き繝ゥ繝蜷
縲縲columns=["a","b","c"]
)
a | b | c | |
one | 1 | 1 | 1 |
two | 2 | 1 | 2 |
three | 3 | 2 | 3 |
窶サ繝ゥ繝吶Ν縲√き繝ゥ繝蜷阪ッ驥崎、蜿ッ閭ス
print(a.index)
# Index(['one', 'two', 'three'], dtype='object')
print(a.index[1])
# two
# 0ス2縺セ縺ァ
print(a.index[0:3])
Index(['one', 'two', 'three'], dtype='object')
蠕後°繧迂ndex縺ョ縺ソ螳夂セゥ
a = pd.Series(['CakePHP3', 'Django', 'Spring'])
a.Index = ['php', 'Python', 'Java']
Index繧ェ繝悶ず繧ァ繧ッ繝医ョ螟画峩繧ィ繝ゥ繝シ
idx = ['php', 'Python', 'Java']
list = ['CakePHP3', 'Django', 'Spring']
col = ['FW']
s = pd.Series(list)
0 | CakePHP3 |
1 | Django |
2 | Spring |
df = pd.DataFrame(s, columns=['FW'], index=idx)
FW | |
php | NaN |
Python | NaN |
Java | NaN |
竊担eries繧貞ョ夂セゥ縺励◆譎らせ縺ァIndex繧ょョ夂セゥ縺輔l縺繧狗ぜ縲∽ク頑嶌縺阪☆繧九→Value縺君aN縺ォ
Index繧ェ繝悶ず繧ァ繧ッ繝医ョ螟画峩譁ケ豕
print(a.columns)
# Index(['a', 'b', 'c'], dtype='object')
a.index[1] = 'new' # 繧ィ繝ゥ繝シ
Index蜷阪r螟画峩縺励◆譁ー縺励>DataFrame繧貞叙蠕
b = a.rename(index={'two':'new'})
print(b.index)
# Index(['one', 'new', 'three'], dtype='object')
Index繧ェ繝悶ず繧ァ繧ッ繝医ョ鄂ョ縺肴鋤縺
a.rename(index={'two':'new'}, inplace=True)
print(a.index)
# Index(['one', 'new', 'three'], dtype='object')
Index繧貞励〒險ュ螳
df
FW | exp | |
php | CakePHP3 | 3 |
Python | Django | 1 |
Java | Spring | 3 |
new = df.set_index('FW')
exp | |
FW | |
CakePHP3 | 3 |
Django | 1 |
Spring | 3 |
繝繝シ繧ソ諠蝣ア
陦梧焚縲∝玲焚
print(df.shape)
(陦梧焚, 蛻玲焚)
繝繝シ繧ソ蝙
print(df.dtypes)
蛻1 object
蛻2 int64
蛻3 float64
隕∫エ邨ア險磯
result = df['蛻怜錐'].describe()
count | 隕∫エ縺ョ蛟区焚 |
mean | 邂苓。灘ケウ蝮 |
std | 讓呎コ門¥蟾ョ |
min | 譛蟆丞、 |
max | 譛螟ァ蛟、 |
25% | 1/4蛻菴肴焚 |
50% | 荳ュ螟ョ蛟、シmedianシ |
75% | 3/4蛻菴肴焚 |
繧ォ繧ヲ繝ウ繝
謖螳壼励ョ蛟、豈弱ョ繧ォ繧ヲ繝ウ繝
df['temperature'].value_counts()
7.8 | 4 |
13.8 | 3 |
28.8 | 3 |
25.2 | 3 |
26.0 | 3 |
繧ー繝ォ繝シ繝怜喧
name蛻励〒繧ー繝ォ繝シ繝怜喧
df.groupby(['name'])
name蛻励〒繧ー繝ォ繝シ繝怜喧縺励◆荳翫〒week蛻励ョ繧ォ繧ヲ繝ウ繝域焚
df.groupby(['name'])['week'].count()
蜻ウ蝎檎┥縺 | 3 |
蜚先恕 | 1 |
蜚先恕縺 | 1 |
蜚先恕縺偵♀繧阪@ | 4 |
繧ス繝シ繝
df.sort_values(by='kcal', ascending=True)
NaN繧貞磯ュ縺ォ(繝繝輔か繝ォ繝='last')
df.sort_values(by='kcal', ascending=False, na_position='first')
鄂ョ謠
df.sort_values(by='kcal', ascending=False, inplace=True)
隍謨ー繧ュ繝シ縺ョ繧ス繝シ繝
df.sort_values(by=['soldout','datetime'], ascending=[False, True]))
Index蜷阪〒繧ス繝シ繝 (繝繝輔か繝ォ繝=True)
df.sort_index(ascending=False)
繝繝シ繧ソ蜿門セ
謖螳夊。後∝苓。ィ遉コ
蜈磯ュ5陦瑚。ィ遉コ
df.head()
蜈磯ュ10陦瑚。ィ遉コ
df.head(10)
譛ォ蟆セ5陦瑚。ィ遉コ
df.tail()
譛ォ蟆セ10陦瑚。ィ遉コ
df.tail(10)
謖螳壼励ョ縺ソ
df[['蛻1','蛻5']].head(10)
蛻礼分蜿キ縺ァ謖螳
df[5:10].head(10)
謖螳夊。(蜈ィ蛻)
窶サ0縺九i
df.loc[100]
謖螳夊。後∵欠螳壼
df.iloc[[1,2,5][2,4]]
df.iloc[[陦1,陦2,陦5][蛻2,蛻4]]
譚。莉カ繧呈欠螳
df[df['蛻怜錐'] > 450]
隍謨ー譚。莉カ繧呈欠螳
df[['蛻怜錐1', '蛻怜錐2']].query('蛻怜錐1 > ス and 蛻怜錐2 == "ス"')
驥崎、繧貞炎髯、縺励※陦ィ遉コ
df['蛻怜錐'].unique()
array(['蛟、1', '蛟、2', '蛟、3'])
驥崎、蜑企勁
result = df['蛻怜錐'].drop_duplicates()
縺昴ョ莉
JupyterNotebook繧ォ繝ャ繝ウ繝医ョ繧」繝ャ繧ッ繝医Μ
import os
os.getcwd()
'C:\\Users\\ス杤\Desktop'
CSV繝輔ぃ繧、繝ォ縺ョ隱ュ霎シ
JupyterNotebook縺ァupload縺励◆CSV繝輔ぃ繧、繝ォ縺ョ隱ュ霎シ
import pandas as pd
df= pd.read_csv('ス.csv')
df= pd.read_csv('ス.csv', sep=',')