Skip to content

Instantly share code, notes, and snippets.

@vinaykudari
Created August 22, 2018 19:46
Show Gist options
  • Save vinaykudari/9f28990df4ac94928fd0a48c289087bf to your computer and use it in GitHub Desktop.
Save vinaykudari/9f28990df4ac94928fd0a48c289087bf to your computer and use it in GitHub Desktop.
Joins
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"import sqlalchemy as db\n",
"import pandas as pd\n",
"\n",
"engine = db.create_engine('sqlite:///census.sqlite')\n",
"connection = engine.connect()\n",
"metadata = db.MetaData()\n",
"\n",
"census = db.Table('census', metadata, autoload=True, autoload_with=engine)\n",
"state_fact = db.Table('state_fact', metadata, autoload=True, autoload_with=engine)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Automatic Join"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>state</th>\n",
" <th>sex</th>\n",
" <th>age</th>\n",
" <th>pop2000</th>\n",
" <th>pop2008</th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>abbreviation</th>\n",
" <th>country</th>\n",
" <th>type</th>\n",
" <th>...</th>\n",
" <th>occupied</th>\n",
" <th>notes</th>\n",
" <th>fips_state</th>\n",
" <th>assoc_press</th>\n",
" <th>standard_federal_region</th>\n",
" <th>census_region</th>\n",
" <th>census_region_name</th>\n",
" <th>census_division</th>\n",
" <th>census_division_name</th>\n",
" <th>circuit_court</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Illinois</td>\n",
" <td>M</td>\n",
" <td>0</td>\n",
" <td>89600</td>\n",
" <td>95012</td>\n",
" <td>13</td>\n",
" <td>Illinois</td>\n",
" <td>IL</td>\n",
" <td>USA</td>\n",
" <td>state</td>\n",
" <td>...</td>\n",
" <td>occupied</td>\n",
" <td></td>\n",
" <td>17</td>\n",
" <td>Ill.</td>\n",
" <td>V</td>\n",
" <td>2</td>\n",
" <td>Midwest</td>\n",
" <td>3</td>\n",
" <td>East North Central</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Illinois</td>\n",
" <td>M</td>\n",
" <td>1</td>\n",
" <td>88445</td>\n",
" <td>91829</td>\n",
" <td>13</td>\n",
" <td>Illinois</td>\n",
" <td>IL</td>\n",
" <td>USA</td>\n",
" <td>state</td>\n",
" <td>...</td>\n",
" <td>occupied</td>\n",
" <td></td>\n",
" <td>17</td>\n",
" <td>Ill.</td>\n",
" <td>V</td>\n",
" <td>2</td>\n",
" <td>Midwest</td>\n",
" <td>3</td>\n",
" <td>East North Central</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Illinois</td>\n",
" <td>M</td>\n",
" <td>2</td>\n",
" <td>88729</td>\n",
" <td>89547</td>\n",
" <td>13</td>\n",
" <td>Illinois</td>\n",
" <td>IL</td>\n",
" <td>USA</td>\n",
" <td>state</td>\n",
" <td>...</td>\n",
" <td>occupied</td>\n",
" <td></td>\n",
" <td>17</td>\n",
" <td>Ill.</td>\n",
" <td>V</td>\n",
" <td>2</td>\n",
" <td>Midwest</td>\n",
" <td>3</td>\n",
" <td>East North Central</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Illinois</td>\n",
" <td>M</td>\n",
" <td>3</td>\n",
" <td>88868</td>\n",
" <td>90037</td>\n",
" <td>13</td>\n",
" <td>Illinois</td>\n",
" <td>IL</td>\n",
" <td>USA</td>\n",
" <td>state</td>\n",
" <td>...</td>\n",
" <td>occupied</td>\n",
" <td></td>\n",
" <td>17</td>\n",
" <td>Ill.</td>\n",
" <td>V</td>\n",
" <td>2</td>\n",
" <td>Midwest</td>\n",
" <td>3</td>\n",
" <td>East North Central</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Illinois</td>\n",
" <td>M</td>\n",
" <td>4</td>\n",
" <td>91947</td>\n",
" <td>91111</td>\n",
" <td>13</td>\n",
" <td>Illinois</td>\n",
" <td>IL</td>\n",
" <td>USA</td>\n",
" <td>state</td>\n",
" <td>...</td>\n",
" <td>occupied</td>\n",
" <td></td>\n",
" <td>17</td>\n",
" <td>Ill.</td>\n",
" <td>V</td>\n",
" <td>2</td>\n",
" <td>Midwest</td>\n",
" <td>3</td>\n",
" <td>East North Central</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" state sex age pop2000 pop2008 id name abbreviation country \\\n",
"0 Illinois M 0 89600 95012 13 Illinois IL USA \n",
"1 Illinois M 1 88445 91829 13 Illinois IL USA \n",
"2 Illinois M 2 88729 89547 13 Illinois IL USA \n",
"3 Illinois M 3 88868 90037 13 Illinois IL USA \n",
"4 Illinois M 4 91947 91111 13 Illinois IL USA \n",
"\n",
" type ... occupied notes fips_state assoc_press \\\n",
"0 state ... occupied 17 Ill. \n",
"1 state ... occupied 17 Ill. \n",
"2 state ... occupied 17 Ill. \n",
"3 state ... occupied 17 Ill. \n",
"4 state ... occupied 17 Ill. \n",
"\n",
" standard_federal_region census_region census_region_name census_division \\\n",
"0 V 2 Midwest 3 \n",
"1 V 2 Midwest 3 \n",
"2 V 2 Midwest 3 \n",
"3 V 2 Midwest 3 \n",
"4 V 2 Midwest 3 \n",
"\n",
" census_division_name circuit_court \n",
"0 East North Central 7 \n",
"1 East North Central 7 \n",
"2 East North Central 7 \n",
"3 East North Central 7 \n",
"4 East North Central 7 \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query = db.select([census.columns.pop2008, state_fact.columns.abbreviation])\n",
"result = connection.execute(query).fetchall()\n",
"df = pd.DataFrame(results)\n",
"df.columns = results[0].keys()\n",
"df.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Manual Join"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>state</th>\n",
" <th>sex</th>\n",
" <th>age</th>\n",
" <th>pop2000</th>\n",
" <th>pop2008</th>\n",
" <th>id</th>\n",
" <th>name</th>\n",
" <th>abbreviation</th>\n",
" <th>country</th>\n",
" <th>type</th>\n",
" <th>...</th>\n",
" <th>occupied</th>\n",
" <th>notes</th>\n",
" <th>fips_state</th>\n",
" <th>assoc_press</th>\n",
" <th>standard_federal_region</th>\n",
" <th>census_region</th>\n",
" <th>census_region_name</th>\n",
" <th>census_division</th>\n",
" <th>census_division_name</th>\n",
" <th>circuit_court</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Illinois</td>\n",
" <td>M</td>\n",
" <td>0</td>\n",
" <td>89600</td>\n",
" <td>95012</td>\n",
" <td>13</td>\n",
" <td>Illinois</td>\n",
" <td>IL</td>\n",
" <td>USA</td>\n",
" <td>state</td>\n",
" <td>...</td>\n",
" <td>occupied</td>\n",
" <td></td>\n",
" <td>17</td>\n",
" <td>Ill.</td>\n",
" <td>V</td>\n",
" <td>2</td>\n",
" <td>Midwest</td>\n",
" <td>3</td>\n",
" <td>East North Central</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Illinois</td>\n",
" <td>M</td>\n",
" <td>1</td>\n",
" <td>88445</td>\n",
" <td>91829</td>\n",
" <td>13</td>\n",
" <td>Illinois</td>\n",
" <td>IL</td>\n",
" <td>USA</td>\n",
" <td>state</td>\n",
" <td>...</td>\n",
" <td>occupied</td>\n",
" <td></td>\n",
" <td>17</td>\n",
" <td>Ill.</td>\n",
" <td>V</td>\n",
" <td>2</td>\n",
" <td>Midwest</td>\n",
" <td>3</td>\n",
" <td>East North Central</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Illinois</td>\n",
" <td>M</td>\n",
" <td>2</td>\n",
" <td>88729</td>\n",
" <td>89547</td>\n",
" <td>13</td>\n",
" <td>Illinois</td>\n",
" <td>IL</td>\n",
" <td>USA</td>\n",
" <td>state</td>\n",
" <td>...</td>\n",
" <td>occupied</td>\n",
" <td></td>\n",
" <td>17</td>\n",
" <td>Ill.</td>\n",
" <td>V</td>\n",
" <td>2</td>\n",
" <td>Midwest</td>\n",
" <td>3</td>\n",
" <td>East North Central</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Illinois</td>\n",
" <td>M</td>\n",
" <td>3</td>\n",
" <td>88868</td>\n",
" <td>90037</td>\n",
" <td>13</td>\n",
" <td>Illinois</td>\n",
" <td>IL</td>\n",
" <td>USA</td>\n",
" <td>state</td>\n",
" <td>...</td>\n",
" <td>occupied</td>\n",
" <td></td>\n",
" <td>17</td>\n",
" <td>Ill.</td>\n",
" <td>V</td>\n",
" <td>2</td>\n",
" <td>Midwest</td>\n",
" <td>3</td>\n",
" <td>East North Central</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Illinois</td>\n",
" <td>M</td>\n",
" <td>4</td>\n",
" <td>91947</td>\n",
" <td>91111</td>\n",
" <td>13</td>\n",
" <td>Illinois</td>\n",
" <td>IL</td>\n",
" <td>USA</td>\n",
" <td>state</td>\n",
" <td>...</td>\n",
" <td>occupied</td>\n",
" <td></td>\n",
" <td>17</td>\n",
" <td>Ill.</td>\n",
" <td>V</td>\n",
" <td>2</td>\n",
" <td>Midwest</td>\n",
" <td>3</td>\n",
" <td>East North Central</td>\n",
" <td>7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" state sex age pop2000 pop2008 id name abbreviation country \\\n",
"0 Illinois M 0 89600 95012 13 Illinois IL USA \n",
"1 Illinois M 1 88445 91829 13 Illinois IL USA \n",
"2 Illinois M 2 88729 89547 13 Illinois IL USA \n",
"3 Illinois M 3 88868 90037 13 Illinois IL USA \n",
"4 Illinois M 4 91947 91111 13 Illinois IL USA \n",
"\n",
" type ... occupied notes fips_state assoc_press \\\n",
"0 state ... occupied 17 Ill. \n",
"1 state ... occupied 17 Ill. \n",
"2 state ... occupied 17 Ill. \n",
"3 state ... occupied 17 Ill. \n",
"4 state ... occupied 17 Ill. \n",
"\n",
" standard_federal_region census_region census_region_name census_division \\\n",
"0 V 2 Midwest 3 \n",
"1 V 2 Midwest 3 \n",
"2 V 2 Midwest 3 \n",
"3 V 2 Midwest 3 \n",
"4 V 2 Midwest 3 \n",
"\n",
" census_division_name circuit_court \n",
"0 East North Central 7 \n",
"1 East North Central 7 \n",
"2 East North Central 7 \n",
"3 East North Central 7 \n",
"4 East North Central 7 \n",
"\n",
"[5 rows x 22 columns]"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query = db.select([census, state_fact])\n",
"query = query.select_from(census.join(state_fact, census.columns.state == state_fact.columns.name))\n",
"results = connection.execute(query).fetchall()\n",
"df = pd.DataFrame(results)\n",
"df.columns = results[0].keys()\n",
"df.head(5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment