Skip to content

Instantly share code, notes, and snippets.

@sureshsarda
Created November 21, 2018 03:00
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 6 You must be signed in to fork a gist
  • Save sureshsarda/00c3b7423ea7b6cba4250a719d6b7424 to your computer and use it in GitHub Desktop.
Save sureshsarda/00c3b7423ea7b6cba4250a719d6b7424 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>City</th>\n",
" <th>EmployeeId</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Mumbai|Bangalore</td>\n",
" <td>001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Pune|Mumbai|Delhi</td>\n",
" <td>002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Mumbai|Bangalore</td>\n",
" <td>003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Mumbai|Pune</td>\n",
" <td>004</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Bangalore</td>\n",
" <td>005</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" City EmployeeId\n",
"0 Mumbai|Bangalore 001\n",
"1 Pune|Mumbai|Delhi 002\n",
"2 Mumbai|Bangalore 003\n",
"3 Mumbai|Pune 004\n",
"4 Bangalore 005"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'EmployeeId': ['001', '002', '003', '004', '005'],\n",
" 'City': ['Mumbai|Bangalore', 'Pune|Mumbai|Delhi', 'Mumbai|Bangalore', 'Mumbai|Pune', 'Bangalore'] \n",
"})\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"EmployeeId \n",
"001 0 Mumbai\n",
" 1 Bangalore\n",
"002 0 Pune\n",
" 1 Mumbai\n",
" 2 Delhi\n",
"003 0 Mumbai\n",
" 1 Bangalore\n",
"004 0 Mumbai\n",
" 1 Pune\n",
"005 0 Bangalore\n",
"dtype: object"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Exploding into multiple cells\n",
"\n",
"# We start with creating a new dataframe from the series with EmployeeId as the index\n",
"new_df = pd.DataFrame(df.City.str.split('|').tolist(), index=df.EmployeeId).stack()\n",
"new_df\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>EmployeeId</th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>001</td>\n",
" <td>Mumbai</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>001</td>\n",
" <td>Bangalore</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>002</td>\n",
" <td>Pune</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>002</td>\n",
" <td>Mumbai</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>002</td>\n",
" <td>Delhi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>003</td>\n",
" <td>Mumbai</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>003</td>\n",
" <td>Bangalore</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>004</td>\n",
" <td>Mumbai</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>004</td>\n",
" <td>Pune</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>005</td>\n",
" <td>Bangalore</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" EmployeeId 0\n",
"0 001 Mumbai\n",
"1 001 Bangalore\n",
"2 002 Pune\n",
"3 002 Mumbai\n",
"4 002 Delhi\n",
"5 003 Mumbai\n",
"6 003 Bangalore\n",
"7 004 Mumbai\n",
"8 004 Pune\n",
"9 005 Bangalore"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# We now want to get rid of the secondary index\n",
"# To do this, we will make EmployeeId as a column (it can't be an index since the values will be duplicate)\n",
"new_df = new_df.reset_index([0, 'EmployeeId'])\n",
"new_df"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>EmployeeId</th>\n",
" <th>City</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>001</td>\n",
" <td>Mumbai</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>001</td>\n",
" <td>Bangalore</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>002</td>\n",
" <td>Pune</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>002</td>\n",
" <td>Mumbai</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>002</td>\n",
" <td>Delhi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>003</td>\n",
" <td>Mumbai</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>003</td>\n",
" <td>Bangalore</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>004</td>\n",
" <td>Mumbai</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>004</td>\n",
" <td>Pune</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>005</td>\n",
" <td>Bangalore</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" EmployeeId City\n",
"0 001 Mumbai\n",
"1 001 Bangalore\n",
"2 002 Pune\n",
"3 002 Mumbai\n",
"4 002 Delhi\n",
"5 003 Mumbai\n",
"6 003 Bangalore\n",
"7 004 Mumbai\n",
"8 004 Pune\n",
"9 005 Bangalore"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The final step is to set the column names as we want them\n",
"new_df.columns = ['EmployeeId', 'City']\n",
"new_df"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment