Created
July 3, 2020 10:41
-
-
Save keimina/c77eb667bb55980b022241381493b26a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>key1</th>\n", | |
" <th>key2</th>\n", | |
" <th>data1</th>\n", | |
" <th>data2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>a</td>\n", | |
" <td>one</td>\n", | |
" <td>0.418184</td>\n", | |
" <td>-0.783387</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>a</td>\n", | |
" <td>two</td>\n", | |
" <td>2.365452</td>\n", | |
" <td>-0.078454</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>b</td>\n", | |
" <td>one</td>\n", | |
" <td>0.389734</td>\n", | |
" <td>1.644066</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>b</td>\n", | |
" <td>two</td>\n", | |
" <td>-0.929937</td>\n", | |
" <td>-0.020328</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>a</td>\n", | |
" <td>one</td>\n", | |
" <td>2.849931</td>\n", | |
" <td>1.530615</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" key1 key2 data1 data2\n", | |
"0 a one 0.418184 -0.783387\n", | |
"1 a two 2.365452 -0.078454\n", | |
"2 b one 0.389734 1.644066\n", | |
"3 b two -0.929937 -0.020328\n", | |
"4 a one 2.849931 1.530615" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"df= pd.DataFrame({'key1' : ['a', 'a', 'b', 'b', 'a'],\n", | |
" 'key2' : ['one', 'two', 'one', 'two', 'one'],\n", | |
" 'data1' : np.random.randn(5), 'data2' : np.random.randn(5)})\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<pandas.core.groupby.generic.SeriesGroupBy object at 0x7fc188187cd0>" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df['data1'].groupby(df['key1'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"key1\n", | |
"a 3\n", | |
"b 2\n", | |
"Name: data1, dtype: int64" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df['data1'].groupby(df['key1']).size()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('a', 0 0.418184\n", | |
" 1 2.365452\n", | |
" 4 2.849931\n", | |
" Name: data1, dtype: float64), ('b', 2 0.389734\n", | |
" 3 -0.929937\n", | |
" Name: data1, dtype: float64)]" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"list(df['data1'].groupby(df['key1']))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"g1 = df['data1'].groupby(df['key1'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"a\n", | |
"0 0.418184\n", | |
"1 2.365452\n", | |
"4 2.849931\n", | |
"Name: data1, dtype: float64\n", | |
"b\n", | |
"2 0.389734\n", | |
"3 -0.929937\n", | |
"Name: data1, dtype: float64\n" | |
] | |
} | |
], | |
"source": [ | |
"# for k, df2 for g1:\n", | |
"for k, df2 in g1:\n", | |
" print(k)\n", | |
" print(df2)\n", | |
" \n", | |
"g1 = df['data1'].groupby(df['key1'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"a\n", | |
"b\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[('a', 0 0.418184\n", | |
" 1 2.365452\n", | |
" 4 2.849931\n", | |
" Name: data1, dtype: float64), ('b', 2 0.389734\n", | |
" 3 -0.929937\n", | |
" Name: data1, dtype: float64)]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# for k, df2 for g1:\n", | |
"for k, df2 in g1:\n", | |
" print(k)\n", | |
" \n", | |
"list(g1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"('a', 0 0.418184\n", | |
" 1 2.365452\n", | |
" 4 2.849931\n", | |
" Name: data1, dtype: float64)" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"list(g1)[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 0.418184\n", | |
"1 2.365452\n", | |
"2 0.389734\n", | |
"3 -0.929937\n", | |
"4 2.849931\n", | |
"Name: data1, dtype: float64" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df['data1']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"pandas.core.series.Series" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"type(df['data1'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"g2 = df[['data2']].groupby(df['key1'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"a\n", | |
"b\n" | |
] | |
} | |
], | |
"source": [ | |
"# for k, df2 for g2:\n", | |
"for k, df2 in g2:\n", | |
" print(k)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"a\n", | |
" data2\n", | |
"0 -0.783387\n", | |
"1 -0.078454\n", | |
"4 1.530615\n", | |
"b\n", | |
" data2\n", | |
"2 1.644066\n", | |
"3 -0.020328\n" | |
] | |
} | |
], | |
"source": [ | |
"# for k, df2 for g2:\n", | |
"for k, df2 in g2:\n", | |
" print(k)\n", | |
" print(df2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# for i for g2:\n", | |
"for i in g2:\n", | |
" pass" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# for i for g2:\n", | |
"for i in g2:\n", | |
" pass" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fc1c82e6910>" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"g2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"a\n", | |
"0 0.418184\n", | |
"1 2.365452\n", | |
"4 2.849931\n", | |
"Name: data1, dtype: float64\n", | |
"b\n", | |
"2 0.389734\n", | |
"3 -0.929937\n", | |
"Name: data1, dtype: float64\n" | |
] | |
} | |
], | |
"source": [ | |
"for k, df2 in g1:\n", | |
" print(k)\n", | |
" print(df2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>key1</th>\n", | |
" <th>key2</th>\n", | |
" <th>data1</th>\n", | |
" <th>data2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>a</td>\n", | |
" <td>one</td>\n", | |
" <td>0.418184</td>\n", | |
" <td>-0.783387</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>a</td>\n", | |
" <td>two</td>\n", | |
" <td>2.365452</td>\n", | |
" <td>-0.078454</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>b</td>\n", | |
" <td>one</td>\n", | |
" <td>0.389734</td>\n", | |
" <td>1.644066</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>b</td>\n", | |
" <td>two</td>\n", | |
" <td>-0.929937</td>\n", | |
" <td>-0.020328</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>a</td>\n", | |
" <td>one</td>\n", | |
" <td>2.849931</td>\n", | |
" <td>1.530615</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" key1 key2 data1 data2\n", | |
"0 a one 0.418184 -0.783387\n", | |
"1 a two 2.365452 -0.078454\n", | |
"2 b one 0.389734 1.644066\n", | |
"3 b two -0.929937 -0.020328\n", | |
"4 a one 2.849931 1.530615" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"g2 = df[['data2']].groupby(df['key1'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"a\n", | |
" data2\n", | |
"0 -0.783387\n", | |
"1 -0.078454\n", | |
"4 1.530615\n", | |
"b\n", | |
" data2\n", | |
"2 1.644066\n", | |
"3 -0.020328\n" | |
] | |
} | |
], | |
"source": [ | |
"for k, df2 in g2:\n", | |
" print(k)\n", | |
" print(df2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"g1 = df['data1'].groupby(df['key1'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"a\n", | |
"0 0.418184\n", | |
"1 2.365452\n", | |
"4 2.849931\n", | |
"Name: data1, dtype: float64\n", | |
"b\n", | |
"2 0.389734\n", | |
"3 -0.929937\n", | |
"Name: data1, dtype: float64\n" | |
] | |
} | |
], | |
"source": [ | |
"for k, df2 in g1:\n", | |
" print(k)\n", | |
" print(df2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"g2 = df[['data2']].groupby(df['key1'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"a\n", | |
" data2\n", | |
"0 -0.783387\n", | |
"1 -0.078454\n", | |
"4 1.530615\n", | |
"b\n", | |
" data2\n", | |
"2 1.644066\n", | |
"3 -0.020328\n" | |
] | |
} | |
], | |
"source": [ | |
"for k, df2 in g2:\n", | |
" print(k)\n", | |
" print(df2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 0.418184\n", | |
"1 2.365452\n", | |
"2 0.389734\n", | |
"3 -0.929937\n", | |
"4 2.849931\n", | |
"Name: data1, dtype: float64" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df['data1']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"pandas.core.series.Series" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"type(df['data1'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"pandas.core.frame.DataFrame" | |
] | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"type(df[['data2']])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>data2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>-0.783387</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>-0.078454</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>1.644066</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>-0.020328</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>1.530615</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" data2\n", | |
"0 -0.783387\n", | |
"1 -0.078454\n", | |
"2 1.644066\n", | |
"3 -0.020328\n", | |
"4 1.530615" | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df[['data2']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"g1.count?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>ア</td>\n", | |
" <td>0.542646</td>\n", | |
" <td>0.319061</td>\n", | |
" <td>-1.785315</td>\n", | |
" <td>1.968788</td>\n", | |
" <td>0.037417</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>イ</td>\n", | |
" <td>1.409337</td>\n", | |
" <td>0.481707</td>\n", | |
" <td>1.358458</td>\n", | |
" <td>-1.873930</td>\n", | |
" <td>-1.868467</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>ウ</td>\n", | |
" <td>0.772065</td>\n", | |
" <td>-0.758248</td>\n", | |
" <td>-0.029830</td>\n", | |
" <td>-0.545882</td>\n", | |
" <td>-0.402525</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>エ</td>\n", | |
" <td>-0.644325</td>\n", | |
" <td>0.369377</td>\n", | |
" <td>1.598141</td>\n", | |
" <td>1.643240</td>\n", | |
" <td>1.928777</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>オ</td>\n", | |
" <td>0.909540</td>\n", | |
" <td>1.360162</td>\n", | |
" <td>0.242817</td>\n", | |
" <td>1.691678</td>\n", | |
" <td>0.880182</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" a b c d e\n", | |
"ア 0.542646 0.319061 -1.785315 1.968788 0.037417\n", | |
"イ 1.409337 0.481707 1.358458 -1.873930 -1.868467\n", | |
"ウ 0.772065 -0.758248 -0.029830 -0.545882 -0.402525\n", | |
"エ -0.644325 0.369377 1.598141 1.643240 1.928777\n", | |
"オ 0.909540 1.360162 0.242817 1.691678 0.880182" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.DataFrame(np.random.randn(5,5), columns=list(\"abcde\"), index=list(\"アイウエオ\"))\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# df.groupby(lambda x: int(x.mean())).sum()\n", | |
"# df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"ア\n", | |
"イ\n", | |
"ウ\n", | |
"エ\n", | |
"オ\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"Empty DataFrame\n", | |
"Columns: [a, b, c, d, e]\n", | |
"Index: []" | |
] | |
}, | |
"execution_count": 32, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.groupby(lambda x: print(x)).sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# df.groupby(lambda x: int(df[x].mean())).sum()\n", | |
"# Error" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# df['ア']\n", | |
"# Error" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>2.079723</td>\n", | |
" <td>0.411896</td>\n", | |
" <td>1.141453</td>\n", | |
" <td>1.192216</td>\n", | |
" <td>-0.304799</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>0.909540</td>\n", | |
" <td>1.360162</td>\n", | |
" <td>0.242817</td>\n", | |
" <td>1.691678</td>\n", | |
" <td>0.880182</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" a b c d e\n", | |
"0 2.079723 0.411896 1.141453 1.192216 -0.304799\n", | |
"1 0.909540 1.360162 0.242817 1.691678 0.880182" | |
] | |
}, | |
"execution_count": 35, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.groupby(lambda x: int(df.loc[x,:].mean())).sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# df = pd.DataFrame(np.random.randn(100,5), columns=list(\"abcde\"), index=list(\"アイウエオ\"))\n", | |
"# Error" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame(np.random.randn(100,5), columns=list(\"abcde\"), index=range(100))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>0.056271</td>\n", | |
" <td>-0.268300</td>\n", | |
" <td>0.101382</td>\n", | |
" <td>-1.440264</td>\n", | |
" <td>0.258070</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>0.932806</td>\n", | |
" <td>-0.320192</td>\n", | |
" <td>0.951708</td>\n", | |
" <td>-0.545906</td>\n", | |
" <td>1.896548</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>-1.098495</td>\n", | |
" <td>-0.438470</td>\n", | |
" <td>0.125594</td>\n", | |
" <td>0.166621</td>\n", | |
" <td>-1.084580</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>-0.349849</td>\n", | |
" <td>0.245474</td>\n", | |
" <td>0.378100</td>\n", | |
" <td>2.195348</td>\n", | |
" <td>-3.408793</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>-1.914535</td>\n", | |
" <td>-0.134004</td>\n", | |
" <td>0.438810</td>\n", | |
" <td>-0.622418</td>\n", | |
" <td>-1.514950</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>95</td>\n", | |
" <td>-0.021858</td>\n", | |
" <td>1.723504</td>\n", | |
" <td>0.723022</td>\n", | |
" <td>1.274680</td>\n", | |
" <td>-0.045642</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>96</td>\n", | |
" <td>0.063260</td>\n", | |
" <td>-0.453309</td>\n", | |
" <td>-0.124663</td>\n", | |
" <td>0.482089</td>\n", | |
" <td>-0.286932</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>97</td>\n", | |
" <td>0.077643</td>\n", | |
" <td>-0.562259</td>\n", | |
" <td>-0.941159</td>\n", | |
" <td>-0.527117</td>\n", | |
" <td>-0.037437</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>98</td>\n", | |
" <td>0.821687</td>\n", | |
" <td>-0.068570</td>\n", | |
" <td>1.069523</td>\n", | |
" <td>-1.541264</td>\n", | |
" <td>0.340885</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>99</td>\n", | |
" <td>0.210404</td>\n", | |
" <td>0.056076</td>\n", | |
" <td>1.138890</td>\n", | |
" <td>-1.748428</td>\n", | |
" <td>0.760276</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>100 rows × 5 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" a b c d e\n", | |
"0 0.056271 -0.268300 0.101382 -1.440264 0.258070\n", | |
"1 0.932806 -0.320192 0.951708 -0.545906 1.896548\n", | |
"2 -1.098495 -0.438470 0.125594 0.166621 -1.084580\n", | |
"3 -0.349849 0.245474 0.378100 2.195348 -3.408793\n", | |
"4 -1.914535 -0.134004 0.438810 -0.622418 -1.514950\n", | |
".. ... ... ... ... ...\n", | |
"95 -0.021858 1.723504 0.723022 1.274680 -0.045642\n", | |
"96 0.063260 -0.453309 -0.124663 0.482089 -0.286932\n", | |
"97 0.077643 -0.562259 -0.941159 -0.527117 -0.037437\n", | |
"98 0.821687 -0.068570 1.069523 -1.541264 0.340885\n", | |
"99 0.210404 0.056076 1.138890 -1.748428 0.760276\n", | |
"\n", | |
"[100 rows x 5 columns]" | |
] | |
}, | |
"execution_count": 38, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>-1</td>\n", | |
" <td>-5.316737</td>\n", | |
" <td>-4.398922</td>\n", | |
" <td>0.389717</td>\n", | |
" <td>-4.520742</td>\n", | |
" <td>-2.640314</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>1.905300</td>\n", | |
" <td>12.460386</td>\n", | |
" <td>-12.652848</td>\n", | |
" <td>-6.621081</td>\n", | |
" <td>-0.446914</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>2.065343</td>\n", | |
" <td>0.643820</td>\n", | |
" <td>1.385855</td>\n", | |
" <td>1.598322</td>\n", | |
" <td>-0.453732</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" a b c d e\n", | |
"-1 -5.316737 -4.398922 0.389717 -4.520742 -2.640314\n", | |
" 0 1.905300 12.460386 -12.652848 -6.621081 -0.446914\n", | |
" 1 2.065343 0.643820 1.385855 1.598322 -0.453732" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.groupby(lambda x: int(df.loc[x,:].mean())).sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment