-
-
Save jbencook/5c2fbb4bd680b32f40130f7198326eeb to your computer and use it in GitHub Desktop.
Create a long format dummy dataset and pivot/unpivot it
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import datetime as dt\n", | |
"import random" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>region</th>\n", | |
" <th>revenue</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td>1999-01-03</td>\n", | |
" <td>EMEA</td>\n", | |
" <td>306</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>1999-01-04</td>\n", | |
" <td>EMEA</td>\n", | |
" <td>236</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1999-01-05</td>\n", | |
" <td>AMER</td>\n", | |
" <td>835</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>1999-01-09</td>\n", | |
" <td>AMER</td>\n", | |
" <td>225</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1999-01-10</td>\n", | |
" <td>AMER</td>\n", | |
" <td>684</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date region revenue\n", | |
"25 1999-01-03 EMEA 306\n", | |
"18 1999-01-04 EMEA 236\n", | |
"4 1999-01-05 AMER 835\n", | |
"8 1999-01-09 AMER 225\n", | |
"0 1999-01-10 AMER 684" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Create a DataFrame with fake sales data for Q1 1999\n", | |
"dates = []\n", | |
"regions = []\n", | |
"revenues = []\n", | |
"\n", | |
"days = {1:31, 2:28, 3:31}\n", | |
"for i in range(30):\n", | |
" month = random.choice([1, 2, 3])\n", | |
" dates.append(dt.date(1999, month, random.randint(1, days[month])))\n", | |
" regions.append(random.choice([\"AMER\", \"EMEA\", \"APAC\"]))\n", | |
" revenues.append(random.randint(100, 999))\n", | |
"\n", | |
"df = pd.DataFrame({\n", | |
" 'date': dates,\n", | |
" 'region': regions,\n", | |
" 'revenue': revenues\n", | |
"}).sort_values(['date', 'region'])\n", | |
"\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>region</th>\n", | |
" <th>AMER</th>\n", | |
" <th>APAC</th>\n", | |
" <th>EMEA</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1999-01-03</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>306</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1999-01-04</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>236</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1999-01-05</th>\n", | |
" <td>835</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1999-01-09</th>\n", | |
" <td>225</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1999-01-10</th>\n", | |
" <td>684</td>\n", | |
" <td>0</td>\n", | |
" <td>257</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"region AMER APAC EMEA\n", | |
"date \n", | |
"1999-01-03 0 0 306\n", | |
"1999-01-04 0 0 236\n", | |
"1999-01-05 835 0 0\n", | |
"1999-01-09 225 0 0\n", | |
"1999-01-10 684 0 257" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Pivot to get total daily revenue per region\n", | |
"wide_df = df.pivot_table(\n", | |
" values='revenue',\n", | |
" index='date',\n", | |
" columns='region',\n", | |
" aggfunc='sum',\n", | |
" fill_value=0,\n", | |
")\n", | |
"\n", | |
"wide_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date</th>\n", | |
" <th>region</th>\n", | |
" <th>total_revenue</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1999-01-03</td>\n", | |
" <td>AMER</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1999-01-04</td>\n", | |
" <td>AMER</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1999-01-05</td>\n", | |
" <td>AMER</td>\n", | |
" <td>835</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1999-01-09</td>\n", | |
" <td>AMER</td>\n", | |
" <td>225</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1999-01-10</td>\n", | |
" <td>AMER</td>\n", | |
" <td>684</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date region total_revenue\n", | |
"0 1999-01-03 AMER 0\n", | |
"1 1999-01-04 AMER 0\n", | |
"2 1999-01-05 AMER 835\n", | |
"3 1999-01-09 AMER 225\n", | |
"4 1999-01-10 AMER 684" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wide_df = wide_df.reset_index()\n", | |
"long_df = wide_df.melt(\n", | |
" id_vars='date',\n", | |
" value_vars=['AMER', 'APAC', 'EMEA'],\n", | |
" value_name='total_revenue',\n", | |
")\n", | |
"\n", | |
"long_df.head()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment