Skip to content

Instantly share code, notes, and snippets.

@keimina
Created May 6, 2020 09:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save keimina/a597aa4ff476fa79e46f96ea141123ef to your computer and use it in GitHub Desktop.
Save keimina/a597aa4ff476fa79e46f96ea141123ef to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"frame = pd.DataFrame(np.arange(12).reshape((4, 3)),index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],columns=[['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"frame.index.names = ['key1', 'key2']"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"frame.columns.names = ['state', 'color']"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th>state</th>\n",
" <th>Colorado</th>\n",
" <th colspan=\"2\" halign=\"left\">Ohio</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>color</th>\n",
" <th>Green</th>\n",
" <th>Green</th>\n",
" <th>Red</th>\n",
" </tr>\n",
" <tr>\n",
" <th>key1</th>\n",
" <th>key2</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td rowspan=\"2\" valign=\"top\">a</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <td rowspan=\"2\" valign=\"top\">b</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>11</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"state Colorado Ohio \n",
"color Green Green Red\n",
"key1 key2 \n",
"a 1 2 0 1\n",
" 2 5 3 4\n",
"b 1 8 6 7\n",
" 2 11 9 10"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame.sort_index(level='color', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th>state</th>\n",
" <th>Colorado</th>\n",
" <th colspan=\"2\" halign=\"left\">Ohio</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>color</th>\n",
" <th>Green</th>\n",
" <th>Green</th>\n",
" <th>Red</th>\n",
" </tr>\n",
" <tr>\n",
" <th>key1</th>\n",
" <th>key2</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td rowspan=\"2\" valign=\"top\">a</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <td rowspan=\"2\" valign=\"top\">b</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>11</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"state Colorado Ohio \n",
"color Green Green Red\n",
"key1 key2 \n",
"a 1 2 0 1\n",
" 2 5 3 4\n",
"b 1 8 6 7\n",
" 2 11 9 10"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"frame.sort_index(level=('color', 'state'), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# left:\n",
"# (foo,bar) x (one, two) = (foo, one), (foo, two), (bar, one), (one, two)\n",
"# right:\n",
"# (foo,bar) x (one, two) = (foo, one), (foo, two), (bar, one), (one, two)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"left = pd.DataFrame({'key1': ['foo', 'foo', 'bar'], 'key2': ['one', 'two', 'one'],'lval': [1, 2, 3]})"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"right = pd.DataFrame({'key1': ['foo', 'foo', 'bar', 'bar'], 'key2': ['one', 'one', 'one', 'two'],'rval': [4, 5, 6, 7]})"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key1</th>\n",
" <th>key2</th>\n",
" <th>lval</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>foo</td>\n",
" <td>one</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>foo</td>\n",
" <td>two</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>bar</td>\n",
" <td>one</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key1 key2 lval\n",
"0 foo one 1\n",
"1 foo two 2\n",
"2 bar one 3"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"left"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key1</th>\n",
" <th>key2</th>\n",
" <th>rval</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>foo</td>\n",
" <td>one</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>foo</td>\n",
" <td>one</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>bar</td>\n",
" <td>one</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>bar</td>\n",
" <td>two</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key1 key2 rval\n",
"0 foo one 4\n",
"1 foo one 5\n",
"2 bar one 6\n",
"3 bar two 7"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"right"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key1</th>\n",
" <th>key2</th>\n",
" <th>lval</th>\n",
" <th>rval</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>foo</td>\n",
" <td>one</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>foo</td>\n",
" <td>one</td>\n",
" <td>1.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>foo</td>\n",
" <td>two</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>bar</td>\n",
" <td>one</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>bar</td>\n",
" <td>two</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key1 key2 lval rval\n",
"0 foo one 1.0 4.0\n",
"1 foo one 1.0 5.0\n",
"2 foo two 2.0 NaN\n",
"3 bar one 3.0 6.0\n",
"4 bar two NaN 7.0"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.merge(left, right, on=['key1', 'key2'], how='outer', indicator=False)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>key1</th>\n",
" <th>key2</th>\n",
" <th>lval</th>\n",
" <th>rval</th>\n",
" <th>_merge</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>foo</td>\n",
" <td>one</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>both</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>foo</td>\n",
" <td>one</td>\n",
" <td>1.0</td>\n",
" <td>5.0</td>\n",
" <td>both</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>foo</td>\n",
" <td>two</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>left_only</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>bar</td>\n",
" <td>one</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>both</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>bar</td>\n",
" <td>two</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>right_only</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" key1 key2 lval rval _merge\n",
"0 foo one 1.0 4.0 both\n",
"1 foo one 1.0 5.0 both\n",
"2 foo two 2.0 NaN left_only\n",
"3 bar one 3.0 6.0 both\n",
"4 bar two NaN 7.0 right_only"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.merge(left, right, on=['key1', 'key2'], how='outer', indicator=True)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"# p259まで\n",
"# 次回は、P259 「8.2.3 軸に沿った連結」\n",
"# 次回、開催時間は、 18:00 - 20:00\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment