pybokeh/pandas_merge.ipynb

## pandas_merge.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "FileA = pd.DataFrame({'ID':[1,2,1,3],'Name':['Bob','Sue','Joe','Fred']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Bob</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Sue</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>Joe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>Fred</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ID  Name\n",
       "0   1   Bob\n",
       "1   2   Sue\n",
       "2   1   Joe\n",
       "3   3  Fred"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FileA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "FileB = pd.DataFrame({'ID':[1,2,3],\n",
    "                      'Address':['here','there','everywhere']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Address</th>\n",
       "      <th>ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>here</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>there</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>everywhere</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Address  ID\n",
       "0        here   1\n",
       "1       there   2\n",
       "2  everywhere   3"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FileB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "merged = pd.merge(FileA, FileB, how='inner', on='ID')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Name</th>\n",
       "      <th>Address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Bob</td>\n",
       "      <td>here</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Joe</td>\n",
       "      <td>here</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>Sue</td>\n",
       "      <td>there</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>Fred</td>\n",
       "      <td>everywhere</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ID  Name     Address\n",
       "0   1   Bob        here\n",
       "1   1   Joe        here\n",
       "2   2   Sue       there\n",
       "3   3  Fred  everywhere"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Help on function merge in module pandas.tools.merge:\n",
      "\n",
      "merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False)\n",
      "    Merge DataFrame objects by performing a database-style join operation by\n",
      "    columns or indexes.\n",
      "    \n",
      "    If joining columns on columns, the DataFrame indexes *will be\n",
      "    ignored*. Otherwise if joining indexes on indexes or indexes on a column or\n",
      "    columns, the index will be passed on.\n",
      "    \n",
      "    Parameters\n",
      "    ----------\n",
      "    left : DataFrame\n",
      "    right : DataFrame\n",
      "    how : {'left', 'right', 'outer', 'inner'}, default 'inner'\n",
      "        * left: use only keys from left frame (SQL: left outer join)\n",
      "        * right: use only keys from right frame (SQL: right outer join)\n",
      "        * outer: use union of keys from both frames (SQL: full outer join)\n",
      "        * inner: use intersection of keys from both frames (SQL: inner join)\n",
      "    on : label or list\n",
      "        Field names to join on. Must be found in both DataFrames. If on is\n",
      "        None and not merging on indexes, then it merges on the intersection of\n",
      "        the columns by default.\n",
      "    left_on : label or list, or array-like\n",
      "        Field names to join on in left DataFrame. Can be a vector or list of\n",
      "        vectors of the length of the DataFrame to use a particular vector as\n",
      "        the join key instead of columns\n",
      "    right_on : label or list, or array-like\n",
      "        Field names to join on in right DataFrame or vector/list of vectors per\n",
      "        left_on docs\n",
      "    left_index : boolean, default False\n",
      "        Use the index from the left DataFrame as the join key(s). If it is a\n",
      "        MultiIndex, the number of keys in the other DataFrame (either the index\n",
      "        or a number of columns) must match the number of levels\n",
      "    right_index : boolean, default False\n",
      "        Use the index from the right DataFrame as the join key. Same caveats as\n",
      "        left_index\n",
      "    sort : boolean, default False\n",
      "        Sort the join keys lexicographically in the result DataFrame\n",
      "    suffixes : 2-length sequence (tuple, list, ...)\n",
      "        Suffix to apply to overlapping column names in the left and right\n",
      "        side, respectively\n",
      "    copy : boolean, default True\n",
      "        If False, do not copy data unnecessarily\n",
      "    indicator : boolean or string, default False\n",
      "        If True, adds a column to output DataFrame called \"_merge\" with\n",
      "        information on the source of each row.\n",
      "        If string, column with information on source of each row will be added to\n",
      "        output DataFrame, and column will be named value of string.\n",
      "        Information column is Categorical-type and takes on a value of \"left_only\"\n",
      "        for observations whose merge key only appears in 'left' DataFrame,\n",
      "        \"right_only\" for observations whose merge key only appears in 'right'\n",
      "        DataFrame, and \"both\" if the observation's merge key is found in both.\n",
      "    \n",
      "        .. versionadded:: 0.17.0\n",
      "    \n",
      "    Examples\n",
      "    --------\n",
      "    \n",
      "    >>> A              >>> B\n",
      "        lkey value         rkey value\n",
      "    0   foo  1         0   foo  5\n",
      "    1   bar  2         1   bar  6\n",
      "    2   baz  3         2   qux  7\n",
      "    3   foo  4         3   bar  8\n",
      "    \n",
      "    >>> merge(A, B, left_on='lkey', right_on='rkey', how='outer')\n",
      "       lkey  value_x  rkey  value_y\n",
      "    0  foo   1        foo   5\n",
      "    1  foo   4        foo   5\n",
      "    2  bar   2        bar   6\n",
      "    3  bar   2        bar   8\n",
      "    4  baz   3        NaN   NaN\n",
      "    5  NaN   NaN      qux   7\n",
      "    \n",
      "    Returns\n",
      "    -------\n",
      "    merged : DataFrame\n",
      "        The output type will the be same as 'left', if it is a subclass\n",
      "        of DataFrame.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "help(pd.merge)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import pandas as pd"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"FileA = pd.DataFrame({'ID':[1,2,1,3],'Name':['Bob','Sue','Joe','Fred']})"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>ID</th>\n",
	" <th>Name</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>Bob</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>2</td>\n",
	" <td>Sue</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>1</td>\n",
	" <td>Joe</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>3</td>\n",
	" <td>Fred</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" ID Name\n",
	"0 1 Bob\n",
	"1 2 Sue\n",
	"2 1 Joe\n",
	"3 3 Fred"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"FileA"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"FileB = pd.DataFrame({'ID':[1,2,3],\n",
	" 'Address':['here','there','everywhere']})"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>Address</th>\n",
	" <th>ID</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>here</td>\n",
	" <td>1</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>there</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>everywhere</td>\n",
	" <td>3</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" Address ID\n",
	"0 here 1\n",
	"1 there 2\n",
	"2 everywhere 3"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"FileB"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"merged = pd.merge(FileA, FileB, how='inner', on='ID')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>ID</th>\n",
	" <th>Name</th>\n",
	" <th>Address</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>Bob</td>\n",
	" <td>here</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>1</td>\n",
	" <td>Joe</td>\n",
	" <td>here</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>2</td>\n",
	" <td>Sue</td>\n",
	" <td>there</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>3</td>\n",
	" <td>Fred</td>\n",
	" <td>everywhere</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" ID Name Address\n",
	"0 1 Bob here\n",
	"1 1 Joe here\n",
	"2 2 Sue there\n",
	"3 3 Fred everywhere"
	]
	},
	"execution_count": 15,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"merged"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Help on function merge in module pandas.tools.merge:\n",
	"\n",
	"merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False)\n",
	" Merge DataFrame objects by performing a database-style join operation by\n",
	" columns or indexes.\n",
	" \n",
	" If joining columns on columns, the DataFrame indexes *will be\n",
	" ignored*. Otherwise if joining indexes on indexes or indexes on a column or\n",
	" columns, the index will be passed on.\n",
	" \n",
	" Parameters\n",
	" ----------\n",
	" left : DataFrame\n",
	" right : DataFrame\n",
	" how : {'left', 'right', 'outer', 'inner'}, default 'inner'\n",
	" * left: use only keys from left frame (SQL: left outer join)\n",
	" * right: use only keys from right frame (SQL: right outer join)\n",
	" * outer: use union of keys from both frames (SQL: full outer join)\n",
	" * inner: use intersection of keys from both frames (SQL: inner join)\n",
	" on : label or list\n",
	" Field names to join on. Must be found in both DataFrames. If on is\n",
	" None and not merging on indexes, then it merges on the intersection of\n",
	" the columns by default.\n",
	" left_on : label or list, or array-like\n",
	" Field names to join on in left DataFrame. Can be a vector or list of\n",
	" vectors of the length of the DataFrame to use a particular vector as\n",
	" the join key instead of columns\n",
	" right_on : label or list, or array-like\n",
	" Field names to join on in right DataFrame or vector/list of vectors per\n",
	" left_on docs\n",
	" left_index : boolean, default False\n",
	" Use the index from the left DataFrame as the join key(s). If it is a\n",
	" MultiIndex, the number of keys in the other DataFrame (either the index\n",
	" or a number of columns) must match the number of levels\n",
	" right_index : boolean, default False\n",
	" Use the index from the right DataFrame as the join key. Same caveats as\n",
	" left_index\n",
	" sort : boolean, default False\n",
	" Sort the join keys lexicographically in the result DataFrame\n",
	" suffixes : 2-length sequence (tuple, list, ...)\n",
	" Suffix to apply to overlapping column names in the left and right\n",
	" side, respectively\n",
	" copy : boolean, default True\n",
	" If False, do not copy data unnecessarily\n",
	" indicator : boolean or string, default False\n",
	" If True, adds a column to output DataFrame called \"_merge\" with\n",
	" information on the source of each row.\n",
	" If string, column with information on source of each row will be added to\n",
	" output DataFrame, and column will be named value of string.\n",
	" Information column is Categorical-type and takes on a value of \"left_only\"\n",
	" for observations whose merge key only appears in 'left' DataFrame,\n",
	" \"right_only\" for observations whose merge key only appears in 'right'\n",
	" DataFrame, and \"both\" if the observation's merge key is found in both.\n",
	" \n",
	" .. versionadded:: 0.17.0\n",
	" \n",
	" Examples\n",
	" --------\n",
	" \n",
	" >>> A >>> B\n",
	" lkey value rkey value\n",
	" 0 foo 1 0 foo 5\n",
	" 1 bar 2 1 bar 6\n",
	" 2 baz 3 2 qux 7\n",
	" 3 foo 4 3 bar 8\n",
	" \n",
	" >>> merge(A, B, left_on='lkey', right_on='rkey', how='outer')\n",
	" lkey value_x rkey value_y\n",
	" 0 foo 1 foo 5\n",
	" 1 foo 4 foo 5\n",
	" 2 bar 2 bar 6\n",
	" 3 bar 2 bar 8\n",
	" 4 baz 3 NaN NaN\n",
	" 5 NaN NaN qux 7\n",
	" \n",
	" Returns\n",
	" -------\n",
	" merged : DataFrame\n",
	" The output type will the be same as 'left', if it is a subclass\n",
	" of DataFrame.\n",
	"\n"
	]
	}
	],
	"source": [
	"help(pd.merge)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}