Created
October 26, 2019 01:14
-
-
Save jshirius/e56ee1cf3ae6a73b38086ea3b0234644 to your computer and use it in GitHub Desktop.
pandasのサンプル!データの抽出(iloc,loc)、データの更新、データの追加、時間比較(データの抽出)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>date</th>\n", | |
" <th>数値</th>\n", | |
" <th>コメント</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>2017-10-01</td>\n", | |
" <td>600</td>\n", | |
" <td>cash</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>2017-10-02</td>\n", | |
" <td>1000</td>\n", | |
" <td>card1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>2017-10-02</td>\n", | |
" <td>2000</td>\n", | |
" <td>card1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>2017-10-03</td>\n", | |
" <td>2400</td>\n", | |
" <td>card2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>2017-10-04</td>\n", | |
" <td>900</td>\n", | |
" <td>card3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>6</td>\n", | |
" <td>2017-10-05</td>\n", | |
" <td>2200</td>\n", | |
" <td>card3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id date 数値 コメント\n", | |
"0 1 2017-10-01 600 cash\n", | |
"1 2 2017-10-02 1000 card1\n", | |
"2 3 2017-10-02 2000 card1\n", | |
"3 4 2017-10-03 2400 card2\n", | |
"4 5 2017-10-04 900 card3\n", | |
"5 6 2017-10-05 2200 card3" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"id int64\n", | |
"date object\n", | |
"数値 int64\n", | |
"コメント object\n", | |
"dtype: object" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"import datetime as dt\n", | |
"\n", | |
"#csvデータを読み込む\n", | |
"df = pd.read_csv('pandas_test' + '.csv', header=0) \n", | |
"display(df)\n", | |
"\n", | |
"#データ・タイプの確認\n", | |
"display(df.dtypes)\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"id 2\n", | |
"date 2017-10-02\n", | |
"数値 1000\n", | |
"コメント card1\n", | |
"Name: 1, dtype: object" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"1000" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"#########################\n", | |
"#データ取り出す操作 ilocにて\n", | |
"#ilocは数値を選択して取り出すもの。先頭のiはint型の意味か\n", | |
"#########################\n", | |
"\n", | |
"#2行目のデータ取り出す\n", | |
"a = df.iloc[1]\n", | |
"display(a)\n", | |
"\n", | |
"#2行目3列目のデータ取得\n", | |
"a = df.iloc[1,2]\n", | |
"display(a)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 2017-10-01\n", | |
"1 2017-10-02\n", | |
"2 2017-10-02\n", | |
"3 2017-10-03\n", | |
"4 2017-10-04\n", | |
"5 2017-10-05\n", | |
"Name: date, dtype: object" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"'card1'" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"#########################\n", | |
"#データ取り出す操作 locにて\n", | |
"#iocは名前を選択して取り出すもの。\n", | |
"#ただし、行については数値で指定しても大丈夫だ\n", | |
"#########################\n", | |
"\n", | |
"#dateの列だけ取得する\n", | |
"a = df.loc[:,\"date\"]\n", | |
"display(a)\n", | |
"\n", | |
"#2行目の「コメント」列の情報を取りたい場合の指定方法\n", | |
"a = df.loc[1,\"コメント\"]\n", | |
"display(a)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>date</th>\n", | |
" <th>数値</th>\n", | |
" <th>コメント</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>2017-10-01</td>\n", | |
" <td>600</td>\n", | |
" <td>cash</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>2017-10-02</td>\n", | |
" <td>1000</td>\n", | |
" <td>コメントの更新してみる</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>2017-10-02</td>\n", | |
" <td>2000</td>\n", | |
" <td>card1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>2017-10-03</td>\n", | |
" <td>2400</td>\n", | |
" <td>card2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>2017-10-04</td>\n", | |
" <td>900</td>\n", | |
" <td>card3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>6</td>\n", | |
" <td>2017-10-05</td>\n", | |
" <td>2200</td>\n", | |
" <td>card3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id date 数値 コメント\n", | |
"0 1 2017-10-01 600 cash\n", | |
"1 2 2017-10-02 1000 コメントの更新してみる\n", | |
"2 3 2017-10-02 2000 card1\n", | |
"3 4 2017-10-03 2400 card2\n", | |
"4 5 2017-10-04 900 card3\n", | |
"5 6 2017-10-05 2200 card3" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"#########################\n", | |
"#データの更新\n", | |
"#########################\n", | |
"\n", | |
"#2行目4列目(コメント)のデータを更新する\n", | |
"df.loc[1,\"コメント\"] = \"コメントの更新してみる\"\n", | |
"display(df)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>date</th>\n", | |
" <th>数値</th>\n", | |
" <th>コメント</th>\n", | |
" <th>コメント2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>2017-10-01</td>\n", | |
" <td>600</td>\n", | |
" <td>cash</td>\n", | |
" <td>コメントカラム追加</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>2017-10-02</td>\n", | |
" <td>1000</td>\n", | |
" <td>コメントの更新してみる</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>2017-10-02</td>\n", | |
" <td>2000</td>\n", | |
" <td>card1</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>2017-10-03</td>\n", | |
" <td>2400</td>\n", | |
" <td>card2</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>2017-10-04</td>\n", | |
" <td>900</td>\n", | |
" <td>card3</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>6</td>\n", | |
" <td>2017-10-05</td>\n", | |
" <td>2200</td>\n", | |
" <td>card3</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id date 数値 コメント コメント2\n", | |
"0 1 2017-10-01 600 cash コメントカラム追加\n", | |
"1 2 2017-10-02 1000 コメントの更新してみる NaN\n", | |
"2 3 2017-10-02 2000 card1 NaN\n", | |
"3 4 2017-10-03 2400 card2 NaN\n", | |
"4 5 2017-10-04 900 card3 NaN\n", | |
"5 6 2017-10-05 2200 card3 NaN" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>date</th>\n", | |
" <th>数値</th>\n", | |
" <th>コメント</th>\n", | |
" <th>コメント2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>2017-10-01</td>\n", | |
" <td>600</td>\n", | |
" <td>cash</td>\n", | |
" <td>コメントカラム追加</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>2017-10-02</td>\n", | |
" <td>1000</td>\n", | |
" <td>コメントの更新してみる</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>2017-10-02</td>\n", | |
" <td>2000</td>\n", | |
" <td>card1</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>2017-10-03</td>\n", | |
" <td>2400</td>\n", | |
" <td>card2</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>2017-10-04</td>\n", | |
" <td>900</td>\n", | |
" <td>card3</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>6</td>\n", | |
" <td>2017-10-05</td>\n", | |
" <td>2200</td>\n", | |
" <td>card3</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>7</td>\n", | |
" <td>2017-10-06</td>\n", | |
" <td>3000</td>\n", | |
" <td>card4</td>\n", | |
" <td>なにか</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id date 数値 コメント コメント2\n", | |
"0 1 2017-10-01 600 cash コメントカラム追加\n", | |
"1 2 2017-10-02 1000 コメントの更新してみる NaN\n", | |
"2 3 2017-10-02 2000 card1 NaN\n", | |
"3 4 2017-10-03 2400 card2 NaN\n", | |
"4 5 2017-10-04 900 card3 NaN\n", | |
"5 6 2017-10-05 2200 card3 NaN\n", | |
"6 7 2017-10-06 3000 card4 なにか" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"#########################\n", | |
"#データの追加\n", | |
"#########################\n", | |
"\n", | |
"#とりあえずカラムを追加する\n", | |
"#この動きとしては、「コメント2」カラムが追加されて、1行目にデータが代入される。データが無いところはNullになる\n", | |
"df.loc[0,\"コメント2\"] = \"コメントカラム追加\"\n", | |
"display(df)\n", | |
"\n", | |
"#行を追加してみるよ。\n", | |
"#行を追加するためSeriesを作る。\n", | |
"#変数 = pd.Series(データの配列, index = 横行の名前の配列)\n", | |
"tmp_se = pd.Series( [ 7 , \"2017-10-06\", 3000, \"card4\", \"なにか\" ], index=df.columns )\n", | |
"#df.append( tmp_se ) NG\n", | |
"#「ignore_index=True」がないと「TypeError: Can only append a Series if ignore_index=True or if the Series has a name」というエラーが出る\n", | |
"#原因はindexの指定が無いから\n", | |
"df = df.append( tmp_se ,ignore_index=True)\n", | |
"display(df)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>date</th>\n", | |
" <th>数値</th>\n", | |
" <th>コメント</th>\n", | |
" <th>コメント2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Empty DataFrame\n", | |
"Columns: [id, date, 数値, コメント, コメント2]\n", | |
"Index: []" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"id int64\n", | |
"date datetime64[ns]\n", | |
"数値 int64\n", | |
"コメント object\n", | |
"コメント2 object\n", | |
"dtype: object\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>date</th>\n", | |
" <th>数値</th>\n", | |
" <th>コメント</th>\n", | |
" <th>コメント2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>6</td>\n", | |
" <td>2017-10-05</td>\n", | |
" <td>2200</td>\n", | |
" <td>card3</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id date 数値 コメント コメント2\n", | |
"5 6 2017-10-05 2200 card3 NaN" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"#########################\n", | |
"#時間の比較\n", | |
"#データの抽出\n", | |
"#########################\n", | |
"\n", | |
"#まず失敗するケース\n", | |
"#「2017-10-05」同じ日付の行だけ抽出したケース\n", | |
"#結果は、1件もデータ取得できない\n", | |
"a = df[ df[\"date\"] == dt.datetime(2017, 10, 5)]\n", | |
"display(a)\n", | |
"\n", | |
"#原因はdateが日付型(datetime64)になっていないため\n", | |
"#この結果、カラム「date」は、datetime64[ns]型の変数に変化する\n", | |
"df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')\n", | |
"print(df.dtypes)\n", | |
"\n", | |
"#「date」カラムをdatetime64にした状態で2017年10月5日の行を取得してみるよ\n", | |
"#その結果、正しく2017年10月5日の行のデータを取得できたことを確認できた。\n", | |
"a = df[ df[\"date\"] == dt.datetime(2017, 10, 5)]\n", | |
"display(a)\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment