Skip to content

Instantly share code, notes, and snippets.

@keimina
Created July 3, 2020 10:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save keimina/c77eb667bb55980b022241381493b26a to your computer and use it in GitHub Desktop.
Save keimina/c77eb667bb55980b022241381493b26a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key1</th>\n",
" <th>key2</th>\n",
" <th>data1</th>\n",
" <th>data2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>a</td>\n",
" <td>one</td>\n",
" <td>0.418184</td>\n",
" <td>-0.783387</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>a</td>\n",
" <td>two</td>\n",
" <td>2.365452</td>\n",
" <td>-0.078454</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>b</td>\n",
" <td>one</td>\n",
" <td>0.389734</td>\n",
" <td>1.644066</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>b</td>\n",
" <td>two</td>\n",
" <td>-0.929937</td>\n",
" <td>-0.020328</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>a</td>\n",
" <td>one</td>\n",
" <td>2.849931</td>\n",
" <td>1.530615</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key1 key2 data1 data2\n",
"0 a one 0.418184 -0.783387\n",
"1 a two 2.365452 -0.078454\n",
"2 b one 0.389734 1.644066\n",
"3 b two -0.929937 -0.020328\n",
"4 a one 2.849931 1.530615"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"df= pd.DataFrame({'key1' : ['a', 'a', 'b', 'b', 'a'],\n",
" 'key2' : ['one', 'two', 'one', 'two', 'one'],\n",
" 'data1' : np.random.randn(5), 'data2' : np.random.randn(5)})\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pandas.core.groupby.generic.SeriesGroupBy object at 0x7fc188187cd0>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['data1'].groupby(df['key1'])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"key1\n",
"a 3\n",
"b 2\n",
"Name: data1, dtype: int64"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['data1'].groupby(df['key1']).size()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('a', 0 0.418184\n",
" 1 2.365452\n",
" 4 2.849931\n",
" Name: data1, dtype: float64), ('b', 2 0.389734\n",
" 3 -0.929937\n",
" Name: data1, dtype: float64)]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(df['data1'].groupby(df['key1']))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"g1 = df['data1'].groupby(df['key1'])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a\n",
"0 0.418184\n",
"1 2.365452\n",
"4 2.849931\n",
"Name: data1, dtype: float64\n",
"b\n",
"2 0.389734\n",
"3 -0.929937\n",
"Name: data1, dtype: float64\n"
]
}
],
"source": [
"# for k, df2 for g1:\n",
"for k, df2 in g1:\n",
" print(k)\n",
" print(df2)\n",
" \n",
"g1 = df['data1'].groupby(df['key1'])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a\n",
"b\n"
]
},
{
"data": {
"text/plain": [
"[('a', 0 0.418184\n",
" 1 2.365452\n",
" 4 2.849931\n",
" Name: data1, dtype: float64), ('b', 2 0.389734\n",
" 3 -0.929937\n",
" Name: data1, dtype: float64)]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# for k, df2 for g1:\n",
"for k, df2 in g1:\n",
" print(k)\n",
" \n",
"list(g1)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('a', 0 0.418184\n",
" 1 2.365452\n",
" 4 2.849931\n",
" Name: data1, dtype: float64)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(g1)[0]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0.418184\n",
"1 2.365452\n",
"2 0.389734\n",
"3 -0.929937\n",
"4 2.849931\n",
"Name: data1, dtype: float64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['data1']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df['data1'])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"g2 = df[['data2']].groupby(df['key1'])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a\n",
"b\n"
]
}
],
"source": [
"# for k, df2 for g2:\n",
"for k, df2 in g2:\n",
" print(k)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a\n",
" data2\n",
"0 -0.783387\n",
"1 -0.078454\n",
"4 1.530615\n",
"b\n",
" data2\n",
"2 1.644066\n",
"3 -0.020328\n"
]
}
],
"source": [
"# for k, df2 for g2:\n",
"for k, df2 in g2:\n",
" print(k)\n",
" print(df2)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# for i for g2:\n",
"for i in g2:\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# for i for g2:\n",
"for i in g2:\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fc1c82e6910>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g2"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a\n",
"0 0.418184\n",
"1 2.365452\n",
"4 2.849931\n",
"Name: data1, dtype: float64\n",
"b\n",
"2 0.389734\n",
"3 -0.929937\n",
"Name: data1, dtype: float64\n"
]
}
],
"source": [
"for k, df2 in g1:\n",
" print(k)\n",
" print(df2)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key1</th>\n",
" <th>key2</th>\n",
" <th>data1</th>\n",
" <th>data2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>a</td>\n",
" <td>one</td>\n",
" <td>0.418184</td>\n",
" <td>-0.783387</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>a</td>\n",
" <td>two</td>\n",
" <td>2.365452</td>\n",
" <td>-0.078454</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>b</td>\n",
" <td>one</td>\n",
" <td>0.389734</td>\n",
" <td>1.644066</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>b</td>\n",
" <td>two</td>\n",
" <td>-0.929937</td>\n",
" <td>-0.020328</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>a</td>\n",
" <td>one</td>\n",
" <td>2.849931</td>\n",
" <td>1.530615</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key1 key2 data1 data2\n",
"0 a one 0.418184 -0.783387\n",
"1 a two 2.365452 -0.078454\n",
"2 b one 0.389734 1.644066\n",
"3 b two -0.929937 -0.020328\n",
"4 a one 2.849931 1.530615"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"g2 = df[['data2']].groupby(df['key1'])"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a\n",
" data2\n",
"0 -0.783387\n",
"1 -0.078454\n",
"4 1.530615\n",
"b\n",
" data2\n",
"2 1.644066\n",
"3 -0.020328\n"
]
}
],
"source": [
"for k, df2 in g2:\n",
" print(k)\n",
" print(df2)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"g1 = df['data1'].groupby(df['key1'])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a\n",
"0 0.418184\n",
"1 2.365452\n",
"4 2.849931\n",
"Name: data1, dtype: float64\n",
"b\n",
"2 0.389734\n",
"3 -0.929937\n",
"Name: data1, dtype: float64\n"
]
}
],
"source": [
"for k, df2 in g1:\n",
" print(k)\n",
" print(df2)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"g2 = df[['data2']].groupby(df['key1'])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"a\n",
" data2\n",
"0 -0.783387\n",
"1 -0.078454\n",
"4 1.530615\n",
"b\n",
" data2\n",
"2 1.644066\n",
"3 -0.020328\n"
]
}
],
"source": [
"for k, df2 in g2:\n",
" print(k)\n",
" print(df2)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0.418184\n",
"1 2.365452\n",
"2 0.389734\n",
"3 -0.929937\n",
"4 2.849931\n",
"Name: data1, dtype: float64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['data1']"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.series.Series"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df['data1'])"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pandas.core.frame.DataFrame"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(df[['data2']])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>data2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>-0.783387</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>-0.078454</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>1.644066</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>-0.020328</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>1.530615</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" data2\n",
"0 -0.783387\n",
"1 -0.078454\n",
"2 1.644066\n",
"3 -0.020328\n",
"4 1.530615"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[['data2']]"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"g1.count?"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>ア</td>\n",
" <td>0.542646</td>\n",
" <td>0.319061</td>\n",
" <td>-1.785315</td>\n",
" <td>1.968788</td>\n",
" <td>0.037417</td>\n",
" </tr>\n",
" <tr>\n",
" <td>イ</td>\n",
" <td>1.409337</td>\n",
" <td>0.481707</td>\n",
" <td>1.358458</td>\n",
" <td>-1.873930</td>\n",
" <td>-1.868467</td>\n",
" </tr>\n",
" <tr>\n",
" <td>ウ</td>\n",
" <td>0.772065</td>\n",
" <td>-0.758248</td>\n",
" <td>-0.029830</td>\n",
" <td>-0.545882</td>\n",
" <td>-0.402525</td>\n",
" </tr>\n",
" <tr>\n",
" <td>エ</td>\n",
" <td>-0.644325</td>\n",
" <td>0.369377</td>\n",
" <td>1.598141</td>\n",
" <td>1.643240</td>\n",
" <td>1.928777</td>\n",
" </tr>\n",
" <tr>\n",
" <td>オ</td>\n",
" <td>0.909540</td>\n",
" <td>1.360162</td>\n",
" <td>0.242817</td>\n",
" <td>1.691678</td>\n",
" <td>0.880182</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c d e\n",
"ア 0.542646 0.319061 -1.785315 1.968788 0.037417\n",
"イ 1.409337 0.481707 1.358458 -1.873930 -1.868467\n",
"ウ 0.772065 -0.758248 -0.029830 -0.545882 -0.402525\n",
"エ -0.644325 0.369377 1.598141 1.643240 1.928777\n",
"オ 0.909540 1.360162 0.242817 1.691678 0.880182"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(np.random.randn(5,5), columns=list(\"abcde\"), index=list(\"アイウエオ\"))\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"# df.groupby(lambda x: int(x.mean())).sum()\n",
"# df"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ア\n",
"イ\n",
"ウ\n",
"エ\n",
"オ\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [a, b, c, d, e]\n",
"Index: []"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby(lambda x: print(x)).sum()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"# df.groupby(lambda x: int(df[x].mean())).sum()\n",
"# Error"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"# df['ア']\n",
"# Error"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>2.079723</td>\n",
" <td>0.411896</td>\n",
" <td>1.141453</td>\n",
" <td>1.192216</td>\n",
" <td>-0.304799</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>0.909540</td>\n",
" <td>1.360162</td>\n",
" <td>0.242817</td>\n",
" <td>1.691678</td>\n",
" <td>0.880182</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c d e\n",
"0 2.079723 0.411896 1.141453 1.192216 -0.304799\n",
"1 0.909540 1.360162 0.242817 1.691678 0.880182"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby(lambda x: int(df.loc[x,:].mean())).sum()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"# df = pd.DataFrame(np.random.randn(100,5), columns=list(\"abcde\"), index=list(\"アイウエオ\"))\n",
"# Error"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(np.random.randn(100,5), columns=list(\"abcde\"), index=range(100))"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>0.056271</td>\n",
" <td>-0.268300</td>\n",
" <td>0.101382</td>\n",
" <td>-1.440264</td>\n",
" <td>0.258070</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>0.932806</td>\n",
" <td>-0.320192</td>\n",
" <td>0.951708</td>\n",
" <td>-0.545906</td>\n",
" <td>1.896548</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>-1.098495</td>\n",
" <td>-0.438470</td>\n",
" <td>0.125594</td>\n",
" <td>0.166621</td>\n",
" <td>-1.084580</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>-0.349849</td>\n",
" <td>0.245474</td>\n",
" <td>0.378100</td>\n",
" <td>2.195348</td>\n",
" <td>-3.408793</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>-1.914535</td>\n",
" <td>-0.134004</td>\n",
" <td>0.438810</td>\n",
" <td>-0.622418</td>\n",
" <td>-1.514950</td>\n",
" </tr>\n",
" <tr>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>95</td>\n",
" <td>-0.021858</td>\n",
" <td>1.723504</td>\n",
" <td>0.723022</td>\n",
" <td>1.274680</td>\n",
" <td>-0.045642</td>\n",
" </tr>\n",
" <tr>\n",
" <td>96</td>\n",
" <td>0.063260</td>\n",
" <td>-0.453309</td>\n",
" <td>-0.124663</td>\n",
" <td>0.482089</td>\n",
" <td>-0.286932</td>\n",
" </tr>\n",
" <tr>\n",
" <td>97</td>\n",
" <td>0.077643</td>\n",
" <td>-0.562259</td>\n",
" <td>-0.941159</td>\n",
" <td>-0.527117</td>\n",
" <td>-0.037437</td>\n",
" </tr>\n",
" <tr>\n",
" <td>98</td>\n",
" <td>0.821687</td>\n",
" <td>-0.068570</td>\n",
" <td>1.069523</td>\n",
" <td>-1.541264</td>\n",
" <td>0.340885</td>\n",
" </tr>\n",
" <tr>\n",
" <td>99</td>\n",
" <td>0.210404</td>\n",
" <td>0.056076</td>\n",
" <td>1.138890</td>\n",
" <td>-1.748428</td>\n",
" <td>0.760276</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" a b c d e\n",
"0 0.056271 -0.268300 0.101382 -1.440264 0.258070\n",
"1 0.932806 -0.320192 0.951708 -0.545906 1.896548\n",
"2 -1.098495 -0.438470 0.125594 0.166621 -1.084580\n",
"3 -0.349849 0.245474 0.378100 2.195348 -3.408793\n",
"4 -1.914535 -0.134004 0.438810 -0.622418 -1.514950\n",
".. ... ... ... ... ...\n",
"95 -0.021858 1.723504 0.723022 1.274680 -0.045642\n",
"96 0.063260 -0.453309 -0.124663 0.482089 -0.286932\n",
"97 0.077643 -0.562259 -0.941159 -0.527117 -0.037437\n",
"98 0.821687 -0.068570 1.069523 -1.541264 0.340885\n",
"99 0.210404 0.056076 1.138890 -1.748428 0.760276\n",
"\n",
"[100 rows x 5 columns]"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>-1</td>\n",
" <td>-5.316737</td>\n",
" <td>-4.398922</td>\n",
" <td>0.389717</td>\n",
" <td>-4.520742</td>\n",
" <td>-2.640314</td>\n",
" </tr>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>1.905300</td>\n",
" <td>12.460386</td>\n",
" <td>-12.652848</td>\n",
" <td>-6.621081</td>\n",
" <td>-0.446914</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>2.065343</td>\n",
" <td>0.643820</td>\n",
" <td>1.385855</td>\n",
" <td>1.598322</td>\n",
" <td>-0.453732</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c d e\n",
"-1 -5.316737 -4.398922 0.389717 -4.520742 -2.640314\n",
" 0 1.905300 12.460386 -12.652848 -6.621081 -0.446914\n",
" 1 2.065343 0.643820 1.385855 1.598322 -0.453732"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby(lambda x: int(df.loc[x,:].mean())).sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment