gitgithan/Exploring stack, melt, pivot_table.ipynb

## Exploring stack, melt, pivot_table.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = \"all\"\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "#----------------Transformation pipelines--------------------\n",
    "#1. df-->stack-->reset_index-->pivot-->df\n",
    "#2. df-->stack-->convert to dataframe-->pivot-->df\n",
    "#3. df-->melt-->add index-->pivot-->df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1. df-->stack-->reset_index-->pivot-->df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "***********************************dataframe************************************\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>weight</th>\n",
       "      <th>height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>cat</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dog</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     weight  height\n",
       "cat       0       1\n",
       "dog       2       3"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "*******************************stacked dataframe********************************\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "cat  weight    0\n",
       "     height    1\n",
       "dog  weight    2\n",
       "     height    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "*********************stacked dataframe with index in column*********************\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>level_1</th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>cat</th>\n",
       "      <td>weight</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cat</th>\n",
       "      <td>height</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dog</th>\n",
       "      <td>weight</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dog</th>\n",
       "      <td>height</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    level_1  0\n",
       "cat  weight  0\n",
       "cat  height  1\n",
       "dog  weight  2\n",
       "dog  height  3"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "*****pivot_table recovered original dataframe (with extra name for columns)*****\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>cat</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dog</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     height  weight\n",
       "cat       1       0\n",
       "dog       3       2"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df= pd.DataFrame([[0, 1], [2, 3]],index=['cat', 'dog'],columns=['weight', 'height'])\n",
    "\n",
    "print('{:*^80}'.format('dataframe'))\n",
    "df\n",
    "\n",
    "print('{:*^80}'.format('stacked dataframe'))\n",
    "df.stack()\n",
    "\n",
    "print('{:*^80}'.format('stacked dataframe with index in column'))\n",
    "df.stack().reset_index(level = 1)   #AttributeError: 'Series' object has no attribute 'pivot_table' , so must convert to dataframe before pivot_table\n",
    "stacked = df.stack().reset_index(level = 1)  \n",
    "\n",
    "print('{:*^80}'.format('pivot_table recovered original dataframe (with extra name for columns)'))\n",
    "recovered_df1 = df.stack().reset_index(level = 1).pivot_table(index = stacked.index, columns = 'level_1',values = 0) #pivot_table orders columns alphabetically,specifying values parameter prevents creation of useless multiindex column \n",
    "recovered_df1.columns.name = None  #remove 'level_1' column.name\n",
    "recovered_df1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. df-->stack-->convert to dataframe-->pivot-->df "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.indexes.base.Index"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(stacked_df.index.get_level_values(0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "***********************************dataframe************************************\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>weight</th>\n",
       "      <th>height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>cat</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dog</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     weight  height\n",
       "cat       0       1\n",
       "dog       2       3"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "**************stack and convert to dataframe to expose pivot_table**************\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">cat</th>\n",
       "      <th>weight</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>height</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">dog</th>\n",
       "      <th>weight</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>height</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            0\n",
       "cat weight  0\n",
       "    height  1\n",
       "dog weight  2\n",
       "    height  3"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "***************rather than unstack, pivot_table achieves the same***************\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>cat</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dog</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     height  weight\n",
       "cat       1       0\n",
       "dog       3       2"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print('{:*^80}'.format('dataframe'))\n",
    "df\n",
    "print('{:*^80}'.format('stack and convert to dataframe to expose pivot_table'))\n",
    "stacked_df = pd.DataFrame(df.stack())\n",
    "stacked_df\n",
    "\n",
    "print('{:*^80}'.format('rather than unstack, pivot_table achieves the same'))\n",
    "idx_lvl0, idx_lvl1 = stacked_df.index.get_level_values(0), stacked_df.index.get_level_values(1)\n",
    "recovered_df2 = stacked_df.pivot_table(index=idx_lvl0,columns = idx_lvl1,values = 0)\n",
    "recovered_df2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3. df-->melt-->add index-->pivot-->df "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "***********************************dataframe************************************\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>weight</th>\n",
       "      <th>height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>cat</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dog</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     weight  height\n",
       "cat       0       1\n",
       "dog       2       3"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "************************melting loses index information*************************\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>variable</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>weight</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>weight</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>height</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>height</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  variable  value\n",
       "0   weight      0\n",
       "1   weight      2\n",
       "2   height      1\n",
       "3   height      3"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "*****************************manually enrich index******************************\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>variable</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>cat</th>\n",
       "      <td>weight</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dog</th>\n",
       "      <td>weight</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cat</th>\n",
       "      <td>height</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dog</th>\n",
       "      <td>height</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    variable  value\n",
       "cat   weight      0\n",
       "dog   weight      2\n",
       "cat   height      1\n",
       "dog   height      3"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "*****pivot_table recovered original dataframe (with extra name for columns)*****\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>height</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>cat</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dog</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     height  weight\n",
       "cat       1       0\n",
       "dog       3       2"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print('{:*^80}'.format('dataframe'))\n",
    "df\n",
    "print('{:*^80}'.format('melting loses index information'))\n",
    "melted = df.melt()   #melt appends columns into new \"variable\" column, while stack adds columns to new inner index layer (same information end up different places)\n",
    "melted\n",
    "\n",
    "print('{:*^80}'.format('manually enrich index')) # until this is solved: https://github.com/pandas-dev/pandas/issues/17440\n",
    "melted.index = ['cat','dog']*2   #list(df.index)*len(df.columns) for more generalizable index generation\n",
    "melted                           \n",
    "\n",
    "print('{:*^80}'.format('pivot_table recovered original dataframe (with extra name for columns)'))\n",
    "recovered_df3 = melted.pivot_table(index = melted.index, columns = 'variable',values = 'value')\n",
    "recovered_df3.columns.name=None  #remove 'variable' column.name\n",
    "recovered_df3\n",
    "\n",
    "#melting loses index while pivot_table requires index parameter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"from IPython.core.interactiveshell import InteractiveShell\n",
	"InteractiveShell.ast_node_interactivity = \"all\"\n",
	"\n",
	"import pandas as pd\n",
	"\n",
	"#----------------Transformation pipelines--------------------\n",
	"#1. df-->stack-->reset_index-->pivot-->df\n",
	"#2. df-->stack-->convert to dataframe-->pivot-->df\n",
	"#3. df-->melt-->add index-->pivot-->df"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# 1. df-->stack-->reset_index-->pivot-->df"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"*********************************dataframe**********************************\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>weight</th>\n",
	" <th>height</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>cat</th>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>dog</th>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" weight height\n",
	"cat 0 1\n",
	"dog 2 3"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"*****************************stacked dataframe******************************\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"cat weight 0\n",
	" height 1\n",
	"dog weight 2\n",
	" height 3\n",
	"dtype: int64"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"*******************stacked dataframe with index in column*******************\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>level_1</th>\n",
	" <th>0</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>cat</th>\n",
	" <td>weight</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>cat</th>\n",
	" <td>height</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>dog</th>\n",
	" <td>weight</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>dog</th>\n",
	" <td>height</td>\n",
	" <td>3</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" level_1 0\n",
	"cat weight 0\n",
	"cat height 1\n",
	"dog weight 2\n",
	"dog height 3"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"***pivot_table recovered original dataframe (with extra name for columns)***\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>height</th>\n",
	" <th>weight</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>cat</th>\n",
	" <td>1</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>dog</th>\n",
	" <td>3</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" height weight\n",
	"cat 1 0\n",
	"dog 3 2"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df= pd.DataFrame([[0, 1], [2, 3]],index=['cat', 'dog'],columns=['weight', 'height'])\n",
	"\n",
	"print('{:*^80}'.format('dataframe'))\n",
	"df\n",
	"\n",
	"print('{:*^80}'.format('stacked dataframe'))\n",
	"df.stack()\n",
	"\n",
	"print('{:*^80}'.format('stacked dataframe with index in column'))\n",
	"df.stack().reset_index(level = 1) #AttributeError: 'Series' object has no attribute 'pivot_table' , so must convert to dataframe before pivot_table\n",
	"stacked = df.stack().reset_index(level = 1) \n",
	"\n",
	"print('{:*^80}'.format('pivot_table recovered original dataframe (with extra name for columns)'))\n",
	"recovered_df1 = df.stack().reset_index(level = 1).pivot_table(index = stacked.index, columns = 'level_1',values = 0) #pivot_table orders columns alphabetically,specifying values parameter prevents creation of useless multiindex column \n",
	"recovered_df1.columns.name = None #remove 'level_1' column.name\n",
	"recovered_df1"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# 2. df-->stack-->convert to dataframe-->pivot-->df "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"pandas.core.indexes.base.Index"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"type(stacked_df.index.get_level_values(0))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"*********************************dataframe**********************************\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>weight</th>\n",
	" <th>height</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>cat</th>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>dog</th>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" weight height\n",
	"cat 0 1\n",
	"dog 2 3"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"************stack and convert to dataframe to expose pivot_table************\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th>0</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th rowspan=\"2\" valign=\"top\">cat</th>\n",
	" <th>weight</th>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>height</th>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th rowspan=\"2\" valign=\"top\">dog</th>\n",
	" <th>weight</th>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>height</th>\n",
	" <td>3</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" 0\n",
	"cat weight 0\n",
	" height 1\n",
	"dog weight 2\n",
	" height 3"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"*************rather than unstack, pivot_table achieves the same*************\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>height</th>\n",
	" <th>weight</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>cat</th>\n",
	" <td>1</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>dog</th>\n",
	" <td>3</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" height weight\n",
	"cat 1 0\n",
	"dog 3 2"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"print('{:*^80}'.format('dataframe'))\n",
	"df\n",
	"print('{:*^80}'.format('stack and convert to dataframe to expose pivot_table'))\n",
	"stacked_df = pd.DataFrame(df.stack())\n",
	"stacked_df\n",
	"\n",
	"print('{:*^80}'.format('rather than unstack, pivot_table achieves the same'))\n",
	"idx_lvl0, idx_lvl1 = stacked_df.index.get_level_values(0), stacked_df.index.get_level_values(1)\n",
	"recovered_df2 = stacked_df.pivot_table(index=idx_lvl0,columns = idx_lvl1,values = 0)\n",
	"recovered_df2"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# 3. df-->melt-->add index-->pivot-->df "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"*********************************dataframe**********************************\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>weight</th>\n",
	" <th>height</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>cat</th>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>dog</th>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" weight height\n",
	"cat 0 1\n",
	"dog 2 3"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"**********************melting loses index information***********************\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>variable</th>\n",
	" <th>value</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>weight</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>weight</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>height</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>height</td>\n",
	" <td>3</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" variable value\n",
	"0 weight 0\n",
	"1 weight 2\n",
	"2 height 1\n",
	"3 height 3"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"***************************manually enrich index****************************\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>variable</th>\n",
	" <th>value</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>cat</th>\n",
	" <td>weight</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>dog</th>\n",
	" <td>weight</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>cat</th>\n",
	" <td>height</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>dog</th>\n",
	" <td>height</td>\n",
	" <td>3</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" variable value\n",
	"cat weight 0\n",
	"dog weight 2\n",
	"cat height 1\n",
	"dog height 3"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"***pivot_table recovered original dataframe (with extra name for columns)***\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>height</th>\n",
	" <th>weight</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>cat</th>\n",
	" <td>1</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>dog</th>\n",
	" <td>3</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" height weight\n",
	"cat 1 0\n",
	"dog 3 2"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"print('{:*^80}'.format('dataframe'))\n",
	"df\n",
	"print('{:*^80}'.format('melting loses index information'))\n",
	"melted = df.melt() #melt appends columns into new \"variable\" column, while stack adds columns to new inner index layer (same information end up different places)\n",
	"melted\n",
	"\n",
	"print('{:*^80}'.format('manually enrich index')) # until this is solved: https://github.com/pandas-dev/pandas/issues/17440\n",
	"melted.index = ['cat','dog']2 #list(df.index)len(df.columns) for more generalizable index generation\n",
	"melted \n",
	"\n",
	"print('{:*^80}'.format('pivot_table recovered original dataframe (with extra name for columns)'))\n",
	"recovered_df3 = melted.pivot_table(index = melted.index, columns = 'variable',values = 'value')\n",
	"recovered_df3.columns.name=None #remove 'variable' column.name\n",
	"recovered_df3\n",
	"\n",
	"#melting loses index while pivot_table requires index parameter"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}