Created
July 22, 2018 14:46
-
-
Save oshikiri/56067ccd21fc5ae1126f71ae74a49957 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"toc": true | |
}, | |
"source": [ | |
"<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n", | |
"<div class=\"toc\" style=\"margin-top: 1em;\"><ul class=\"toc-item\"><li><span><a href=\"#GroupBy-オブジェクトに-pipe-メソッドが追加された(v0.23より)\" data-toc-modified-id=\"GroupBy-オブジェクトに-pipe-メソッドが追加された(v0.23より)-1\"><span class=\"toc-item-num\">1 </span>GroupBy オブジェクトに pipe メソッドが追加された(v0.23より)</a></span></li><li><span><a href=\"#各グループの要素数をカウントするときは、count-ではなく-size-を使う\" data-toc-modified-id=\"各グループの要素数をカウントするときは、count-ではなく-size-を使う-2\"><span class=\"toc-item-num\">2 </span>各グループの要素数をカウントするときは、<code>count</code> ではなく <code>size</code> を使う</a></span></li><li><span><a href=\"#ひとつの-assign-内部で前に(定義|更新)したカラムを参照できるようになった(v0.23より)\" data-toc-modified-id=\"ひとつの-assign-内部で前に(定義|更新)したカラムを参照できるようになった(v0.23より)-3\"><span class=\"toc-item-num\">3 </span>ひとつの <code>assign</code> 内部で前に(定義|更新)したカラムを参照できるようになった(v0.23より)</a></span></li><li><span><a href=\"#DataFrame.eval\" data-toc-modified-id=\"DataFrame.eval-4\"><span class=\"toc-item-num\">4 </span>DataFrame.eval</a></span></li></ul></div>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-07-22T14:45:55.449582Z", | |
"start_time": "2018-07-22T14:45:55.249102Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"\n", | |
"n = 1000\n", | |
"df = pd.DataFrame({'Store': np.random.choice(['Store_1', 'Store_2'], n),\n", | |
" 'Product': np.random.choice(['Product_1', 'Product_2', 'Product_3'], n),\n", | |
" 'Revenue': (np.random.random(n)*50+10).round(2),\n", | |
" 'Quantity': np.random.randint(1, 10, size=n)})" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# GroupBy オブジェクトに pipe メソッドが追加された(v0.23より)\n", | |
"\n", | |
"c.f. https://pandas.pydata.org/pandas-docs/version/0.23.0/whatsnew.html#groupby-objects-now-have-a-pipe-method\n", | |
"\n", | |
"これまでだと↓のような感じで書く必要があったのが、(もしかするともっと良い書き方があるかも…?)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-07-22T14:45:55.468174Z", | |
"start_time": "2018-07-22T14:45:55.450916Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Product</th>\n", | |
" <th>Product_1</th>\n", | |
" <th>Product_2</th>\n", | |
" <th>Product_3</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Store</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Store_1</th>\n", | |
" <td>7.53</td>\n", | |
" <td>7.07</td>\n", | |
" <td>6.93</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Store_2</th>\n", | |
" <td>7.00</td>\n", | |
" <td>7.52</td>\n", | |
" <td>6.83</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Product Product_1 Product_2 Product_3\n", | |
"Store \n", | |
"Store_1 7.53 7.07 6.93\n", | |
"Store_2 7.00 7.52 6.83" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(\n", | |
" df\n", | |
" .groupby(['Store', 'Product']) \n", | |
" .sum()\n", | |
" .assign(rpq = lambda d: d.Revenue / d.Quantity)\n", | |
" .rpq\n", | |
" .unstack()\n", | |
" .round(2)\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"↓だけでよくなった" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-07-22T14:45:55.479267Z", | |
"start_time": "2018-07-22T14:45:55.469712Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>Product</th>\n", | |
" <th>Product_1</th>\n", | |
" <th>Product_2</th>\n", | |
" <th>Product_3</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Store</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Store_1</th>\n", | |
" <td>7.53</td>\n", | |
" <td>7.07</td>\n", | |
" <td>6.93</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Store_2</th>\n", | |
" <td>7.00</td>\n", | |
" <td>7.52</td>\n", | |
" <td>6.83</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Product Product_1 Product_2 Product_3\n", | |
"Store \n", | |
"Store_1 7.53 7.07 6.93\n", | |
"Store_2 7.00 7.52 6.83" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(\n", | |
" df\n", | |
" .groupby(['Store', 'Product'])\n", | |
" .pipe(lambda grp: grp.Revenue.sum() / grp.Quantity.sum())\n", | |
" .unstack()\n", | |
" .round(2)\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# 各グループの要素数をカウントするときは、`count` ではなく `size` を使う\n", | |
"\n", | |
"見逃しててずっと `count` で頑張っていたが、" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-07-22T14:45:55.485432Z", | |
"start_time": "2018-07-22T14:45:55.480535Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Store Product \n", | |
"Store_1 Product_1 163\n", | |
" Product_2 188\n", | |
" Product_3 153\n", | |
"Store_2 Product_1 163\n", | |
" Product_2 173\n", | |
" Product_3 160\n", | |
"Name: Product, dtype: int64" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.groupby(['Store', 'Product']).Product.count()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"↓ こう書けば良い" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-07-22T14:45:55.491025Z", | |
"start_time": "2018-07-22T14:45:55.486580Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Store Product \n", | |
"Store_1 Product_1 163\n", | |
" Product_2 188\n", | |
" Product_3 153\n", | |
"Store_2 Product_1 163\n", | |
" Product_2 173\n", | |
" Product_3 160\n", | |
"dtype: int64" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.groupby(['Store', 'Product']).size()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# ひとつの `assign` 内部で前に(定義|更新)したカラムを参照できるようになった(v0.23より)\n", | |
"\n", | |
"c.f. https://pandas.pydata.org/pandas-docs/version/0.23.0/whatsnew.html#assign-accepts-dependent-arguments" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-07-22T14:45:55.501585Z", | |
"start_time": "2018-07-22T14:45:55.492439Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Store</th>\n", | |
" <th>Product</th>\n", | |
" <th>Revenue</th>\n", | |
" <th>Quantity</th>\n", | |
" <th>rpq</th>\n", | |
" <th>rpq_squared</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Store_1</td>\n", | |
" <td>Product_2</td>\n", | |
" <td>24.76</td>\n", | |
" <td>1</td>\n", | |
" <td>24.760000</td>\n", | |
" <td>613.057600</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Store_2</td>\n", | |
" <td>Product_2</td>\n", | |
" <td>53.47</td>\n", | |
" <td>4</td>\n", | |
" <td>13.367500</td>\n", | |
" <td>178.690056</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Store_1</td>\n", | |
" <td>Product_2</td>\n", | |
" <td>21.24</td>\n", | |
" <td>4</td>\n", | |
" <td>5.310000</td>\n", | |
" <td>28.196100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Store_1</td>\n", | |
" <td>Product_2</td>\n", | |
" <td>51.53</td>\n", | |
" <td>9</td>\n", | |
" <td>5.725556</td>\n", | |
" <td>32.781986</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Store_2</td>\n", | |
" <td>Product_2</td>\n", | |
" <td>50.65</td>\n", | |
" <td>8</td>\n", | |
" <td>6.331250</td>\n", | |
" <td>40.084727</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Store Product Revenue Quantity rpq rpq_squared\n", | |
"0 Store_1 Product_2 24.76 1 24.760000 613.057600\n", | |
"1 Store_2 Product_2 53.47 4 13.367500 178.690056\n", | |
"2 Store_1 Product_2 21.24 4 5.310000 28.196100\n", | |
"3 Store_1 Product_2 51.53 9 5.725556 32.781986\n", | |
"4 Store_2 Product_2 50.65 8 6.331250 40.084727" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(\n", | |
" df\n", | |
" .assign(\n", | |
" rpq = lambda d: d.Revenue / d.Quantity,\n", | |
" rpq_squared = lambda d: d.rpq ** 2\n", | |
" )\n", | |
" .head()\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# DataFrame.eval\n", | |
"\n", | |
"assign と同じことを inplace でやりたいなら `eval` で似たようなことができる\n", | |
"\n", | |
"c.f. https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.eval.html" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-07-22T14:45:55.514041Z", | |
"start_time": "2018-07-22T14:45:55.502672Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Store</th>\n", | |
" <th>Product</th>\n", | |
" <th>Revenue</th>\n", | |
" <th>Quantity</th>\n", | |
" <th>rpq</th>\n", | |
" <th>rpq_squared</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Store_1</td>\n", | |
" <td>Product_2</td>\n", | |
" <td>24.76</td>\n", | |
" <td>1</td>\n", | |
" <td>24.760000</td>\n", | |
" <td>613.057600</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Store_2</td>\n", | |
" <td>Product_2</td>\n", | |
" <td>53.47</td>\n", | |
" <td>4</td>\n", | |
" <td>13.367500</td>\n", | |
" <td>178.690056</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Store_1</td>\n", | |
" <td>Product_2</td>\n", | |
" <td>21.24</td>\n", | |
" <td>4</td>\n", | |
" <td>5.310000</td>\n", | |
" <td>28.196100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Store_1</td>\n", | |
" <td>Product_2</td>\n", | |
" <td>51.53</td>\n", | |
" <td>9</td>\n", | |
" <td>5.725556</td>\n", | |
" <td>32.781986</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Store_2</td>\n", | |
" <td>Product_2</td>\n", | |
" <td>50.65</td>\n", | |
" <td>8</td>\n", | |
" <td>6.331250</td>\n", | |
" <td>40.084727</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Store Product Revenue Quantity rpq rpq_squared\n", | |
"0 Store_1 Product_2 24.76 1 24.760000 613.057600\n", | |
"1 Store_2 Product_2 53.47 4 13.367500 178.690056\n", | |
"2 Store_1 Product_2 21.24 4 5.310000 28.196100\n", | |
"3 Store_1 Product_2 51.53 9 5.725556 32.781986\n", | |
"4 Store_2 Product_2 50.65 8 6.331250 40.084727" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df2 = df\n", | |
"df2.eval('rpq = Revenue / Quantity', inplace = True)\n", | |
"df2.eval('rpq_squared = rpq * rpq', inplace = True)\n", | |
"df2.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.6" | |
}, | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"toc_cell": true, | |
"toc_position": {}, | |
"toc_section_display": "block", | |
"toc_window_display": true | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment