sureshsarda/ExplodingAColumnIntoMultipleCells.ipynb

## ExplodingAColumnIntoMultipleCells.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>EmployeeId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Mumbai|Bangalore</td>\n",
       "      <td>001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Pune|Mumbai|Delhi</td>\n",
       "      <td>002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Mumbai|Bangalore</td>\n",
       "      <td>003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Mumbai|Pune</td>\n",
       "      <td>004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Bangalore</td>\n",
       "      <td>005</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                City EmployeeId\n",
       "0   Mumbai|Bangalore        001\n",
       "1  Pune|Mumbai|Delhi        002\n",
       "2   Mumbai|Bangalore        003\n",
       "3        Mumbai|Pune        004\n",
       "4          Bangalore        005"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame({\n",
    "   'EmployeeId': ['001', '002', '003', '004', '005'],\n",
    "   'City': ['Mumbai|Bangalore', 'Pune|Mumbai|Delhi', 'Mumbai|Bangalore', 'Mumbai|Pune', 'Bangalore'] \n",
    "})\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "EmployeeId   \n",
       "001         0       Mumbai\n",
       "            1    Bangalore\n",
       "002         0         Pune\n",
       "            1       Mumbai\n",
       "            2        Delhi\n",
       "003         0       Mumbai\n",
       "            1    Bangalore\n",
       "004         0       Mumbai\n",
       "            1         Pune\n",
       "005         0    Bangalore\n",
       "dtype: object"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Exploding into multiple cells\n",
    "\n",
    "# We start with creating a new dataframe from the series with EmployeeId as the index\n",
    "new_df = pd.DataFrame(df.City.str.split('|').tolist(), index=df.EmployeeId).stack()\n",
    "new_df\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>EmployeeId</th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>001</td>\n",
       "      <td>Mumbai</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>001</td>\n",
       "      <td>Bangalore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>002</td>\n",
       "      <td>Pune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>002</td>\n",
       "      <td>Mumbai</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>002</td>\n",
       "      <td>Delhi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>003</td>\n",
       "      <td>Mumbai</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>003</td>\n",
       "      <td>Bangalore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>004</td>\n",
       "      <td>Mumbai</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>004</td>\n",
       "      <td>Pune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>005</td>\n",
       "      <td>Bangalore</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  EmployeeId          0\n",
       "0        001     Mumbai\n",
       "1        001  Bangalore\n",
       "2        002       Pune\n",
       "3        002     Mumbai\n",
       "4        002      Delhi\n",
       "5        003     Mumbai\n",
       "6        003  Bangalore\n",
       "7        004     Mumbai\n",
       "8        004       Pune\n",
       "9        005  Bangalore"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# We now want to get rid of the secondary index\n",
    "# To do this, we will make EmployeeId as a column (it can't be an index since the values will be duplicate)\n",
    "new_df = new_df.reset_index([0, 'EmployeeId'])\n",
    "new_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>EmployeeId</th>\n",
       "      <th>City</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>001</td>\n",
       "      <td>Mumbai</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>001</td>\n",
       "      <td>Bangalore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>002</td>\n",
       "      <td>Pune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>002</td>\n",
       "      <td>Mumbai</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>002</td>\n",
       "      <td>Delhi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>003</td>\n",
       "      <td>Mumbai</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>003</td>\n",
       "      <td>Bangalore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>004</td>\n",
       "      <td>Mumbai</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>004</td>\n",
       "      <td>Pune</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>005</td>\n",
       "      <td>Bangalore</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  EmployeeId       City\n",
       "0        001     Mumbai\n",
       "1        001  Bangalore\n",
       "2        002       Pune\n",
       "3        002     Mumbai\n",
       "4        002      Delhi\n",
       "5        003     Mumbai\n",
       "6        003  Bangalore\n",
       "7        004     Mumbai\n",
       "8        004       Pune\n",
       "9        005  Bangalore"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# The final step is to set the column names as we want them\n",
    "new_df.columns = ['EmployeeId', 'City']\n",
    "new_df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>City</th>\n",
	" <th>EmployeeId</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>Mumbai\|Bangalore</td>\n",
	" <td>001</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>Pune\|Mumbai\|Delhi</td>\n",
	" <td>002</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>Mumbai\|Bangalore</td>\n",
	" <td>003</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>Mumbai\|Pune</td>\n",
	" <td>004</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>Bangalore</td>\n",
	" <td>005</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" City EmployeeId\n",
	"0 Mumbai\|Bangalore 001\n",
	"1 Pune\|Mumbai\|Delhi 002\n",
	"2 Mumbai\|Bangalore 003\n",
	"3 Mumbai\|Pune 004\n",
	"4 Bangalore 005"
	]
	},
	"execution_count": 1,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"import pandas as pd\n",
	"\n",
	"df = pd.DataFrame({\n",
	" 'EmployeeId': ['001', '002', '003', '004', '005'],\n",
	" 'City': ['Mumbai\|Bangalore', 'Pune\|Mumbai\|Delhi', 'Mumbai\|Bangalore', 'Mumbai\|Pune', 'Bangalore'] \n",
	"})\n",
	"\n",
	"df"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"EmployeeId \n",
	"001 0 Mumbai\n",
	" 1 Bangalore\n",
	"002 0 Pune\n",
	" 1 Mumbai\n",
	" 2 Delhi\n",
	"003 0 Mumbai\n",
	" 1 Bangalore\n",
	"004 0 Mumbai\n",
	" 1 Pune\n",
	"005 0 Bangalore\n",
	"dtype: object"
	]
	},
	"execution_count": 22,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Exploding into multiple cells\n",
	"\n",
	"# We start with creating a new dataframe from the series with EmployeeId as the index\n",
	"new_df = pd.DataFrame(df.City.str.split('\|').tolist(), index=df.EmployeeId).stack()\n",
	"new_df\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>EmployeeId</th>\n",
	" <th>0</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>001</td>\n",
	" <td>Mumbai</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>001</td>\n",
	" <td>Bangalore</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>002</td>\n",
	" <td>Pune</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>002</td>\n",
	" <td>Mumbai</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>002</td>\n",
	" <td>Delhi</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>003</td>\n",
	" <td>Mumbai</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6</th>\n",
	" <td>003</td>\n",
	" <td>Bangalore</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7</th>\n",
	" <td>004</td>\n",
	" <td>Mumbai</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>8</th>\n",
	" <td>004</td>\n",
	" <td>Pune</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>9</th>\n",
	" <td>005</td>\n",
	" <td>Bangalore</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" EmployeeId 0\n",
	"0 001 Mumbai\n",
	"1 001 Bangalore\n",
	"2 002 Pune\n",
	"3 002 Mumbai\n",
	"4 002 Delhi\n",
	"5 003 Mumbai\n",
	"6 003 Bangalore\n",
	"7 004 Mumbai\n",
	"8 004 Pune\n",
	"9 005 Bangalore"
	]
	},
	"execution_count": 23,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# We now want to get rid of the secondary index\n",
	"# To do this, we will make EmployeeId as a column (it can't be an index since the values will be duplicate)\n",
	"new_df = new_df.reset_index([0, 'EmployeeId'])\n",
	"new_df"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>EmployeeId</th>\n",
	" <th>City</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>001</td>\n",
	" <td>Mumbai</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>001</td>\n",
	" <td>Bangalore</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>002</td>\n",
	" <td>Pune</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>002</td>\n",
	" <td>Mumbai</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>002</td>\n",
	" <td>Delhi</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>003</td>\n",
	" <td>Mumbai</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6</th>\n",
	" <td>003</td>\n",
	" <td>Bangalore</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7</th>\n",
	" <td>004</td>\n",
	" <td>Mumbai</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>8</th>\n",
	" <td>004</td>\n",
	" <td>Pune</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>9</th>\n",
	" <td>005</td>\n",
	" <td>Bangalore</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" EmployeeId City\n",
	"0 001 Mumbai\n",
	"1 001 Bangalore\n",
	"2 002 Pune\n",
	"3 002 Mumbai\n",
	"4 002 Delhi\n",
	"5 003 Mumbai\n",
	"6 003 Bangalore\n",
	"7 004 Mumbai\n",
	"8 004 Pune\n",
	"9 005 Bangalore"
	]
	},
	"execution_count": 24,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# The final step is to set the column names as we want them\n",
	"new_df.columns = ['EmployeeId', 'City']\n",
	"new_df"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}