Skip to content

Instantly share code, notes, and snippets.

@pybokeh
Created December 18, 2015 22:28
Show Gist options
  • Save pybokeh/d1022122068c9e15114f to your computer and use it in GitHub Desktop.
Save pybokeh/d1022122068c9e15114f to your computer and use it in GitHub Desktop.
pandas_merge
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"FileA = pd.DataFrame({'ID':[1,2,1,3],'Name':['Bob','Sue','Joe','Fred']})"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Bob</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Sue</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>Joe</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>Fred</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Name\n",
"0 1 Bob\n",
"1 2 Sue\n",
"2 1 Joe\n",
"3 3 Fred"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"FileA"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"FileB = pd.DataFrame({'ID':[1,2,3],\n",
" 'Address':['here','there','everywhere']})"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Address</th>\n",
" <th>ID</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>here</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>there</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>everywhere</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Address ID\n",
"0 here 1\n",
"1 there 2\n",
"2 everywhere 3"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"FileB"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"merged = pd.merge(FileA, FileB, how='inner', on='ID')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>Name</th>\n",
" <th>Address</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Bob</td>\n",
" <td>here</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>Joe</td>\n",
" <td>here</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>Sue</td>\n",
" <td>there</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>Fred</td>\n",
" <td>everywhere</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID Name Address\n",
"0 1 Bob here\n",
"1 1 Joe here\n",
"2 2 Sue there\n",
"3 3 Fred everywhere"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merged"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on function merge in module pandas.tools.merge:\n",
"\n",
"merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False)\n",
" Merge DataFrame objects by performing a database-style join operation by\n",
" columns or indexes.\n",
" \n",
" If joining columns on columns, the DataFrame indexes *will be\n",
" ignored*. Otherwise if joining indexes on indexes or indexes on a column or\n",
" columns, the index will be passed on.\n",
" \n",
" Parameters\n",
" ----------\n",
" left : DataFrame\n",
" right : DataFrame\n",
" how : {'left', 'right', 'outer', 'inner'}, default 'inner'\n",
" * left: use only keys from left frame (SQL: left outer join)\n",
" * right: use only keys from right frame (SQL: right outer join)\n",
" * outer: use union of keys from both frames (SQL: full outer join)\n",
" * inner: use intersection of keys from both frames (SQL: inner join)\n",
" on : label or list\n",
" Field names to join on. Must be found in both DataFrames. If on is\n",
" None and not merging on indexes, then it merges on the intersection of\n",
" the columns by default.\n",
" left_on : label or list, or array-like\n",
" Field names to join on in left DataFrame. Can be a vector or list of\n",
" vectors of the length of the DataFrame to use a particular vector as\n",
" the join key instead of columns\n",
" right_on : label or list, or array-like\n",
" Field names to join on in right DataFrame or vector/list of vectors per\n",
" left_on docs\n",
" left_index : boolean, default False\n",
" Use the index from the left DataFrame as the join key(s). If it is a\n",
" MultiIndex, the number of keys in the other DataFrame (either the index\n",
" or a number of columns) must match the number of levels\n",
" right_index : boolean, default False\n",
" Use the index from the right DataFrame as the join key. Same caveats as\n",
" left_index\n",
" sort : boolean, default False\n",
" Sort the join keys lexicographically in the result DataFrame\n",
" suffixes : 2-length sequence (tuple, list, ...)\n",
" Suffix to apply to overlapping column names in the left and right\n",
" side, respectively\n",
" copy : boolean, default True\n",
" If False, do not copy data unnecessarily\n",
" indicator : boolean or string, default False\n",
" If True, adds a column to output DataFrame called \"_merge\" with\n",
" information on the source of each row.\n",
" If string, column with information on source of each row will be added to\n",
" output DataFrame, and column will be named value of string.\n",
" Information column is Categorical-type and takes on a value of \"left_only\"\n",
" for observations whose merge key only appears in 'left' DataFrame,\n",
" \"right_only\" for observations whose merge key only appears in 'right'\n",
" DataFrame, and \"both\" if the observation's merge key is found in both.\n",
" \n",
" .. versionadded:: 0.17.0\n",
" \n",
" Examples\n",
" --------\n",
" \n",
" >>> A >>> B\n",
" lkey value rkey value\n",
" 0 foo 1 0 foo 5\n",
" 1 bar 2 1 bar 6\n",
" 2 baz 3 2 qux 7\n",
" 3 foo 4 3 bar 8\n",
" \n",
" >>> merge(A, B, left_on='lkey', right_on='rkey', how='outer')\n",
" lkey value_x rkey value_y\n",
" 0 foo 1 foo 5\n",
" 1 foo 4 foo 5\n",
" 2 bar 2 bar 6\n",
" 3 bar 2 bar 8\n",
" 4 baz 3 NaN NaN\n",
" 5 NaN NaN qux 7\n",
" \n",
" Returns\n",
" -------\n",
" merged : DataFrame\n",
" The output type will the be same as 'left', if it is a subclass\n",
" of DataFrame.\n",
"\n"
]
}
],
"source": [
"help(pd.merge)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment