Skip to content

Instantly share code, notes, and snippets.

@nacyot
Created December 1, 2013 05:12
Show Gist options
  • Save nacyot/7728751 to your computer and use it in GitHub Desktop.
Save nacyot/7728751 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# 5.0 pandas \uc2dc\uc791\ud558\uae30"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"\n",
"from pandas import Series, DataFrame\n",
"import pandas as pd"
],
"language": "python",
"outputs": [],
"prompt_number": 54
},
{
"cell_type": "markdown",
"source": [
"## 5.1 pandas \uc790\ub8cc \uad6c\uc870 \uc18c\uac1c\n",
"### 5.1.1 Series"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = Series([4, 7, -5, 3])"
],
"language": "python",
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 12,
"text": [
"0 4\n",
"1 7\n",
"2 -5\n",
"3 3\n",
"dtype: int64"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj.values"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 13,
"text": [
"array([ 4, 7, -5, 3])"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj.index"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 14,
"text": [
"Int64Index([0, 1, 2, 3], dtype=int64)"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj2 = Series([4, 7, -5, 3], index=['d', 'c', 'b', 'a'])"
],
"language": "python",
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj2"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 16,
"text": [
"d 4\n",
"c 7\n",
"b -5\n",
"a 3\n",
"dtype: int64"
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj2.index"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 17,
"text": [
"Index([u'd', u'c', u'b', u'a'], dtype=object)"
]
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj2['a']"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 18,
"text": [
"3"
]
}
],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj2['d'] = 6"
],
"language": "python",
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj2[['c', 'a', 'd']]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 21,
"text": [
"c 7\n",
"a 3\n",
"d 6\n",
"dtype: int64"
]
}
],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj2"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 22,
"text": [
"d 6\n",
"c 7\n",
"b -5\n",
"a 3\n",
"dtype: int64"
]
}
],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj2[obj2 > 0]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 24,
"text": [
"d 6\n",
"c 7\n",
"a 3\n",
"dtype: int64"
]
}
],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj2 * 2"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 25,
"text": [
"d 12\n",
"c 14\n",
"b -10\n",
"a 6\n",
"dtype: int64"
]
}
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"np.exp(obj2)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 28,
"text": [
"d 403.428793\n",
"c 1096.633158\n",
"b 0.006738\n",
"a 20.085537\n",
"dtype: float64"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"'b' in obj2"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 32,
"text": [
"True"
]
}
],
"prompt_number": 32
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"'e' in obj2"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 33,
"text": [
"False"
]
}
],
"prompt_number": 33
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}\n",
"obj3 = Series(sdata)\n",
"obj3"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 37,
"text": [
"Ohio 35000\n",
"Oregon 16000\n",
"Texas 71000\n",
"Utah 5000\n",
"dtype: int64"
]
}
],
"prompt_number": 37
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"states = ['California', 'Ohio', 'Oregon', 'Texas']\n",
"obj4 = Series(sdata, index=states)\n",
"obj4"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 38,
"text": [
"California NaN\n",
"Ohio 35000\n",
"Oregon 16000\n",
"Texas 71000\n",
"dtype: float64"
]
}
],
"prompt_number": 38
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.isnull(obj4)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 39,
"text": [
"California True\n",
"Ohio False\n",
"Oregon False\n",
"Texas False\n",
"dtype: bool"
]
}
],
"prompt_number": 39
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.notnull(obj4)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 40,
"text": [
"California False\n",
"Ohio True\n",
"Oregon True\n",
"Texas True\n",
"dtype: bool"
]
}
],
"prompt_number": 40
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj4.isnull()"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 42,
"text": [
"California True\n",
"Ohio False\n",
"Oregon False\n",
"Texas False\n",
"dtype: bool"
]
}
],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj3"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 43,
"text": [
"Ohio 35000\n",
"Oregon 16000\n",
"Texas 71000\n",
"Utah 5000\n",
"dtype: int64"
]
}
],
"prompt_number": 43
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj4"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 44,
"text": [
"California NaN\n",
"Ohio 35000\n",
"Oregon 16000\n",
"Texas 71000\n",
"dtype: float64"
]
}
],
"prompt_number": 44
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj3 + obj4"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 45,
"text": [
"California NaN\n",
"Ohio 70000\n",
"Oregon 32000\n",
"Texas 142000\n",
"Utah NaN\n",
"dtype: float64"
]
}
],
"prompt_number": 45
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj4.name = 'population'"
],
"language": "python",
"outputs": [],
"prompt_number": 46
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj4.index.name = 'state'"
],
"language": "python",
"outputs": [],
"prompt_number": 47
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj4"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 48,
"text": [
"state\n",
"California NaN\n",
"Ohio 35000\n",
"Oregon 16000\n",
"Texas 71000\n",
"Name: population, dtype: float64"
]
}
],
"prompt_number": 48
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj.index = ['Box', 'Steve', 'Jeff', 'Ryan']"
],
"language": "python",
"outputs": [],
"prompt_number": 49
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 50,
"text": [
"Box 4\n",
"Steve 7\n",
"Jeff -5\n",
"Ryan 3\n",
"dtype: int64"
]
}
],
"prompt_number": 50
},
{
"cell_type": "markdown",
"source": [
"### 5.1.2 DataFrame"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],\n",
" 'year': [2000, 2001, 200, 2001, 2002],\n",
" 'pop': [1.5, 1.7, 3.6, 2.5, 2.9]}\n",
"frame = DataFrame(data)\n",
"frame"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>pop</th>\n",
" <th>state</th>\n",
" <th>year</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 1.5</td>\n",
" <td> Ohio</td>\n",
" <td> 2000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 1.7</td>\n",
" <td> Ohio</td>\n",
" <td> 2001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 3.6</td>\n",
" <td> Ohio</td>\n",
" <td> 200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 2.5</td>\n",
" <td> Nevada</td>\n",
" <td> 2001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 2.9</td>\n",
" <td> Nevada</td>\n",
" <td> 2002</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 55,
"text": [
" pop state year\n",
"0 1.5 Ohio 2000\n",
"1 1.7 Ohio 2001\n",
"2 3.6 Ohio 200\n",
"3 2.5 Nevada 2001\n",
"4 2.9 Nevada 2002"
]
}
],
"prompt_number": 55
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"DataFrame(data, columns = ['year', 'state', 'pop'])"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>year</th>\n",
" <th>state</th>\n",
" <th>pop</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 2000</td>\n",
" <td> Ohio</td>\n",
" <td> 1.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 2001</td>\n",
" <td> Ohio</td>\n",
" <td> 1.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 200</td>\n",
" <td> Ohio</td>\n",
" <td> 3.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 2001</td>\n",
" <td> Nevada</td>\n",
" <td> 2.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 2002</td>\n",
" <td> Nevada</td>\n",
" <td> 2.9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 56,
"text": [
" year state pop\n",
"0 2000 Ohio 1.5\n",
"1 2001 Ohio 1.7\n",
"2 200 Ohio 3.6\n",
"3 2001 Nevada 2.5\n",
"4 2002 Nevada 2.9"
]
}
],
"prompt_number": 56
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame2 = DataFrame(data, columns = ['year', 'state', 'pop', 'debt'], \n",
" index = ['one', 'two', 'three', 'four', 'five'])\n",
"frame2"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>year</th>\n",
" <th>state</th>\n",
" <th>pop</th>\n",
" <th>debt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>one</th>\n",
" <td> 2000</td>\n",
" <td> Ohio</td>\n",
" <td> 1.5</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>two</th>\n",
" <td> 2001</td>\n",
" <td> Ohio</td>\n",
" <td> 1.7</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>three</th>\n",
" <td> 200</td>\n",
" <td> Ohio</td>\n",
" <td> 3.6</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>four</th>\n",
" <td> 2001</td>\n",
" <td> Nevada</td>\n",
" <td> 2.5</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>five</th>\n",
" <td> 2002</td>\n",
" <td> Nevada</td>\n",
" <td> 2.9</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 58,
"text": [
" year state pop debt\n",
"one 2000 Ohio 1.5 NaN\n",
"two 2001 Ohio 1.7 NaN\n",
"three 200 Ohio 3.6 NaN\n",
"four 2001 Nevada 2.5 NaN\n",
"five 2002 Nevada 2.9 NaN"
]
}
],
"prompt_number": 58
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame2.columns"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 59,
"text": [
"Index([u'year', u'state', u'pop', u'debt'], dtype=object)"
]
}
],
"prompt_number": 59
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame2['state']"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 60,
"text": [
"one Ohio\n",
"two Ohio\n",
"three Ohio\n",
"four Nevada\n",
"five Nevada\n",
"Name: state, dtype: object"
]
}
],
"prompt_number": 60
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame2.year"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 63,
"text": [
"one 2000\n",
"two 2001\n",
"three 200\n",
"four 2001\n",
"five 2002\n",
"Name: year, dtype: int64"
]
}
],
"prompt_number": 63
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame2.ix['three']"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 64,
"text": [
"year 200\n",
"state Ohio\n",
"pop 3.6\n",
"debt NaN\n",
"Name: three, dtype: object"
]
}
],
"prompt_number": 64
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame2['debt'] = 16.5\n",
"frame2"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>year</th>\n",
" <th>state</th>\n",
" <th>pop</th>\n",
" <th>debt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>one</th>\n",
" <td> 2000</td>\n",
" <td> Ohio</td>\n",
" <td> 1.5</td>\n",
" <td> 16.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>two</th>\n",
" <td> 2001</td>\n",
" <td> Ohio</td>\n",
" <td> 1.7</td>\n",
" <td> 16.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>three</th>\n",
" <td> 200</td>\n",
" <td> Ohio</td>\n",
" <td> 3.6</td>\n",
" <td> 16.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>four</th>\n",
" <td> 2001</td>\n",
" <td> Nevada</td>\n",
" <td> 2.5</td>\n",
" <td> 16.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>five</th>\n",
" <td> 2002</td>\n",
" <td> Nevada</td>\n",
" <td> 2.9</td>\n",
" <td> 16.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 67,
"text": [
" year state pop debt\n",
"one 2000 Ohio 1.5 16.5\n",
"two 2001 Ohio 1.7 16.5\n",
"three 200 Ohio 3.6 16.5\n",
"four 2001 Nevada 2.5 16.5\n",
"five 2002 Nevada 2.9 16.5"
]
}
],
"prompt_number": 67
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame2['debt'] = np.arange(5.)\n",
"frame"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>pop</th>\n",
" <th>state</th>\n",
" <th>year</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 1.5</td>\n",
" <td> Ohio</td>\n",
" <td> 2000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 1.7</td>\n",
" <td> Ohio</td>\n",
" <td> 2001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 3.6</td>\n",
" <td> Ohio</td>\n",
" <td> 200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 2.5</td>\n",
" <td> Nevada</td>\n",
" <td> 2001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 2.9</td>\n",
" <td> Nevada</td>\n",
" <td> 2002</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 69,
"text": [
" pop state year\n",
"0 1.5 Ohio 2000\n",
"1 1.7 Ohio 2001\n",
"2 3.6 Ohio 200\n",
"3 2.5 Nevada 2001\n",
"4 2.9 Nevada 2002"
]
}
],
"prompt_number": 69
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"val = Series([-1.2, -1.5, -1.7], index = ['two', 'four', 'five'])\n",
"frame2['dabt'] = val\n",
"frame2"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>year</th>\n",
" <th>state</th>\n",
" <th>pop</th>\n",
" <th>debt</th>\n",
" <th>dabt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>one</th>\n",
" <td> 2000</td>\n",
" <td> Ohio</td>\n",
" <td> 1.5</td>\n",
" <td> 0</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>two</th>\n",
" <td> 2001</td>\n",
" <td> Ohio</td>\n",
" <td> 1.7</td>\n",
" <td> 1</td>\n",
" <td>-1.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>three</th>\n",
" <td> 200</td>\n",
" <td> Ohio</td>\n",
" <td> 3.6</td>\n",
" <td> 2</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>four</th>\n",
" <td> 2001</td>\n",
" <td> Nevada</td>\n",
" <td> 2.5</td>\n",
" <td> 3</td>\n",
" <td>-1.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>five</th>\n",
" <td> 2002</td>\n",
" <td> Nevada</td>\n",
" <td> 2.9</td>\n",
" <td> 4</td>\n",
" <td>-1.7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 71,
"text": [
" year state pop debt dabt\n",
"one 2000 Ohio 1.5 0 NaN\n",
"two 2001 Ohio 1.7 1 -1.2\n",
"three 200 Ohio 3.6 2 NaN\n",
"four 2001 Nevada 2.5 3 -1.5\n",
"five 2002 Nevada 2.9 4 -1.7"
]
}
],
"prompt_number": 71
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame2['eastern'] = frame2.state == 'Ohio'\n",
"frame2"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>year</th>\n",
" <th>state</th>\n",
" <th>pop</th>\n",
" <th>debt</th>\n",
" <th>dabt</th>\n",
" <th>eastern</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>one</th>\n",
" <td> 2000</td>\n",
" <td> Ohio</td>\n",
" <td> 1.5</td>\n",
" <td> 0</td>\n",
" <td> NaN</td>\n",
" <td> True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>two</th>\n",
" <td> 2001</td>\n",
" <td> Ohio</td>\n",
" <td> 1.7</td>\n",
" <td> 1</td>\n",
" <td>-1.2</td>\n",
" <td> True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>three</th>\n",
" <td> 200</td>\n",
" <td> Ohio</td>\n",
" <td> 3.6</td>\n",
" <td> 2</td>\n",
" <td> NaN</td>\n",
" <td> True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>four</th>\n",
" <td> 2001</td>\n",
" <td> Nevada</td>\n",
" <td> 2.5</td>\n",
" <td> 3</td>\n",
" <td>-1.5</td>\n",
" <td> False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>five</th>\n",
" <td> 2002</td>\n",
" <td> Nevada</td>\n",
" <td> 2.9</td>\n",
" <td> 4</td>\n",
" <td>-1.7</td>\n",
" <td> False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 75,
"text": [
" year state pop debt dabt eastern\n",
"one 2000 Ohio 1.5 0 NaN True\n",
"two 2001 Ohio 1.7 1 -1.2 True\n",
"three 200 Ohio 3.6 2 NaN True\n",
"four 2001 Nevada 2.5 3 -1.5 False\n",
"five 2002 Nevada 2.9 4 -1.7 False"
]
}
],
"prompt_number": 75
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"del frame2['eastern']\n",
"frame2.columns"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 76,
"text": [
"Index([u'year', u'state', u'pop', u'debt', u'dabt'], dtype=object)"
]
}
],
"prompt_number": 76
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pop = {'Nevada': {2001: 2.4, 2002: 2.9},\n",
" 'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}\n",
"frame3 = DataFrame(pop)\n",
"frame3"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Nevada</th>\n",
" <th>Ohio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2000</th>\n",
" <td> NaN</td>\n",
" <td> 1.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2001</th>\n",
" <td> 2.4</td>\n",
" <td> 1.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2002</th>\n",
" <td> 2.9</td>\n",
" <td> 3.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 77,
"text": [
" Nevada Ohio\n",
"2000 NaN 1.5\n",
"2001 2.4 1.7\n",
"2002 2.9 3.6"
]
}
],
"prompt_number": 77
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame3.T"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>2000</th>\n",
" <th>2001</th>\n",
" <th>2002</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Nevada</th>\n",
" <td> NaN</td>\n",
" <td> 2.4</td>\n",
" <td> 2.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 1.5</td>\n",
" <td> 1.7</td>\n",
" <td> 3.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 78,
"text": [
" 2000 2001 2002\n",
"Nevada NaN 2.4 2.9\n",
"Ohio 1.5 1.7 3.6"
]
}
],
"prompt_number": 78
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"DataFrame(pop, index = [2001, 2002, 2003])"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Nevada</th>\n",
" <th>Ohio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2001</th>\n",
" <td> 2.4</td>\n",
" <td> 1.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2002</th>\n",
" <td> 2.9</td>\n",
" <td> 3.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2003</th>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 79,
"text": [
" Nevada Ohio\n",
"2001 2.4 1.7\n",
"2002 2.9 3.6\n",
"2003 NaN NaN"
]
}
],
"prompt_number": 79
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pdata = {'Ohio': frame3['Ohio'][:-1],\n",
" 'Nevada': frame3['Nevada'][:2]}\n",
"DataFrame(pdata)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Nevada</th>\n",
" <th>Ohio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2000</th>\n",
" <td> NaN</td>\n",
" <td> 1.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2001</th>\n",
" <td> 2.4</td>\n",
" <td> 1.7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 81,
"text": [
" Nevada Ohio\n",
"2000 NaN 1.5\n",
"2001 2.4 1.7"
]
}
],
"prompt_number": 81
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame3.index.name = 'year'\n",
"frame3.columns.name = 'state'\n",
"frame"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>pop</th>\n",
" <th>state</th>\n",
" <th>year</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 1.5</td>\n",
" <td> Ohio</td>\n",
" <td> 2000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 1.7</td>\n",
" <td> Ohio</td>\n",
" <td> 2001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 3.6</td>\n",
" <td> Ohio</td>\n",
" <td> 200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 2.5</td>\n",
" <td> Nevada</td>\n",
" <td> 2001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 2.9</td>\n",
" <td> Nevada</td>\n",
" <td> 2002</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 82,
"text": [
" pop state year\n",
"0 1.5 Ohio 2000\n",
"1 1.7 Ohio 2001\n",
"2 3.6 Ohio 200\n",
"3 2.5 Nevada 2001\n",
"4 2.9 Nevada 2002"
]
}
],
"prompt_number": 82
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame3.values"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 83,
"text": [
"array([[ nan, 1.5],\n",
" [ 2.4, 1.7],\n",
" [ 2.9, 3.6]])"
]
}
],
"prompt_number": 83
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame2.values"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 102,
"text": [
"array([[2000, 'Ohio', 1.5, 0.0, nan],\n",
" [2001, 'Ohio', 1.7, 1.0, -1.2],\n",
" [200, 'Ohio', 3.6, 2.0, nan],\n",
" [2001, 'Nevada', 2.5, 3.0, -1.5],\n",
" [2002, 'Nevada', 2.9, 4.0, -1.7]], dtype=object)"
]
}
],
"prompt_number": 102
},
{
"cell_type": "markdown",
"source": [
"### 5.1.3 \uc0c9\uc778 \uac1d\uccb4"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = Series(range(3), index=['a', 'b', 'c'])\n",
"index = obj.index\n",
"index\n",
"index[1:]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 87,
"text": [
"Index([u'b', u'c'], dtype=object)"
]
}
],
"prompt_number": 87
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"index[1] = 'd'"
],
"language": "python",
"outputs": [
{
"ename": "Exception",
"evalue": "<class 'pandas.core.index.Index'> object is immutable",
"output_type": "pyerr",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mException\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-89-676fdeb26a68>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mindex\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'd'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m/home/hsin/.pyenv/versions/2.7.5/lib/python2.7/site-packages/pandas/core/index.pyc\u001b[0m in \u001b[0;36m__setitem__\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m 328\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 329\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 330\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m' object is immutable'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 331\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 332\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mException\u001b[0m: <class 'pandas.core.index.Index'> object is immutable"
]
}
],
"prompt_number": 89
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"index = pd.Index(np.arange(3))\n",
"obj2 = Series([1.5, -2.5, 0], index = index)\n",
"obj2.index is index"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 94,
"text": [
"True"
]
}
],
"prompt_number": 94
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame3"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>state</th>\n",
" <th>Nevada</th>\n",
" <th>Ohio</th>\n",
" </tr>\n",
" <tr>\n",
" <th>year</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2000</th>\n",
" <td> NaN</td>\n",
" <td> 1.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2001</th>\n",
" <td> 2.4</td>\n",
" <td> 1.7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2002</th>\n",
" <td> 2.9</td>\n",
" <td> 3.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 95,
"text": [
"state Nevada Ohio\n",
"year \n",
"2000 NaN 1.5\n",
"2001 2.4 1.7\n",
"2002 2.9 3.6"
]
}
],
"prompt_number": 95
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"'Ohio' in frame3.columns"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 96,
"text": [
"True"
]
}
],
"prompt_number": 96
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"2003 in frame3.index"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 98,
"text": [
"False"
]
}
],
"prompt_number": 98
},
{
"cell_type": "markdown",
"source": [
"## 5.2 \ud575\uc2ec\uae30\ub2a5\n",
"### 5.2.1 \uc7ac\uc0c9\uc778"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = Series([4.2, 7.2, -5.3, 3.6], index = ['d', 'b', 'a', 'c'])\n",
"obj"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 103,
"text": [
"d 4.2\n",
"b 7.2\n",
"a -5.3\n",
"c 3.6\n",
"dtype: float64"
]
}
],
"prompt_number": 103
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])\n",
"obj2"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 104,
"text": [
"a -5.3\n",
"b 7.2\n",
"c 3.6\n",
"d 4.2\n",
"e NaN\n",
"dtype: float64"
]
}
],
"prompt_number": 104
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=0)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 105,
"text": [
"a -5.3\n",
"b 7.2\n",
"c 3.6\n",
"d 4.2\n",
"e 0.0\n",
"dtype: float64"
]
}
],
"prompt_number": 105
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj3 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])\n",
"obj3.reindex(range(6), method = 'ffill')"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 107,
"text": [
"0 blue\n",
"1 blue\n",
"2 purple\n",
"3 purple\n",
"4 yellow\n",
"5 yellow\n",
"dtype: object"
]
}
],
"prompt_number": 107
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame = DataFrame(np.arange(9).reshape((3,3)), index=['a', 'c', 'd'],\n",
" columns=['Ohio', 'Taxes', 'California'] )\n",
"frame"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Ohio</th>\n",
" <th>Taxes</th>\n",
" <th>California</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 109,
"text": [
" Ohio Taxes California\n",
"a 0 1 2\n",
"c 3 4 5\n",
"d 6 7 8"
]
}
],
"prompt_number": 109
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame2 = frame.reindex(['a', 'b', 'c', 'd'])\n",
"frame2"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Ohio</th>\n",
" <th>Taxes</th>\n",
" <th>California</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 110,
"text": [
" Ohio Taxes California\n",
"a 0 1 2\n",
"b NaN NaN NaN\n",
"c 3 4 5\n",
"d 6 7 8"
]
}
],
"prompt_number": 110
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"states = ['Taxes', 'Utah', 'California']\n",
"frame.reindex(columns = states)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Taxes</th>\n",
" <th>Utah</th>\n",
" <th>California</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td> 7</td>\n",
" <td>NaN</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 111,
"text": [
" Taxes Utah California\n",
"a 1 NaN 2\n",
"c 4 NaN 5\n",
"d 7 NaN 8"
]
}
],
"prompt_number": 111
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame.reindex(index=['a', 'b', 'c', 'd'], method = 'ffill', columns = states)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Taxes</th>\n",
" <th>Utah</th>\n",
" <th>California</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td> 7</td>\n",
" <td>NaN</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 112,
"text": [
" Taxes Utah California\n",
"a 1 NaN 2\n",
"b 1 NaN 2\n",
"c 4 NaN 5\n",
"d 7 NaN 8"
]
}
],
"prompt_number": 112
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame.ix[['a', 'b', 'c', 'd'], states]"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Taxes</th>\n",
" <th>Utah</th>\n",
" <th>California</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td> 7</td>\n",
" <td>NaN</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 118,
"text": [
" Taxes Utah California\n",
"a 1 NaN 2\n",
"b NaN NaN NaN\n",
"c 4 NaN 5\n",
"d 7 NaN 8"
]
}
],
"prompt_number": 118
},
{
"cell_type": "markdown",
"source": [
"### 5.2.2 \ud558\ub098\uc758 \ub85c\uc6b0 \ub610\ub294 \uceec\ub7fc \uc0ad\uc81c\ud558\uae30"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])\n",
"new_obj = obj.drop('c')\n",
"new_obj"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 121,
"text": [
"a 0\n",
"b 1\n",
"d 3\n",
"e 4\n",
"dtype: float64"
]
}
],
"prompt_number": 121
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj.drop(['d', 'c'])"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 122,
"text": [
"a 0\n",
"b 1\n",
"e 4\n",
"dtype: float64"
]
}
],
"prompt_number": 122
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data = DataFrame(np.arange(16).reshape((4,4)), \n",
" index=['Ohio', 'Colorado', 'Utah', 'New York'],\n",
" columns=['one', 'two', 'three', 'four'])\n",
"data.drop(['Colorado', 'Ohio'])"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" <th>three</th>\n",
" <th>four</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 8</td>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New York</th>\n",
" <td> 12</td>\n",
" <td> 13</td>\n",
" <td> 14</td>\n",
" <td> 15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 124,
"text": [
" one two three four\n",
"Utah 8 9 10 11\n",
"New York 12 13 14 15"
]
}
],
"prompt_number": 124
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.drop('two', axis=1)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>three</th>\n",
" <th>four</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 0</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td> 4</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 8</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New York</th>\n",
" <td> 12</td>\n",
" <td> 14</td>\n",
" <td> 15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 125,
"text": [
" one three four\n",
"Ohio 0 2 3\n",
"Colorado 4 6 7\n",
"Utah 8 10 11\n",
"New York 12 14 15"
]
}
],
"prompt_number": 125
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.drop(['two', 'four'], axis=1)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>three</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 0</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td> 4</td>\n",
" <td> 6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 8</td>\n",
" <td> 10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New York</th>\n",
" <td> 12</td>\n",
" <td> 14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 136,
"text": [
" one three\n",
"Ohio 0 2\n",
"Colorado 4 6\n",
"Utah 8 10\n",
"New York 12 14"
]
}
],
"prompt_number": 136
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"### 5.2.3 \uc0c9\uc778\ud558\uae30, \uc120\ud0dd\ud558\uae30, \uac70\ub974\uae30"
],
"language": "python",
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = Series(np.arange(4.), index = ['a', 'b', 'c', 'd'])\n",
"obj"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 128,
"text": [
"a 0\n",
"b 1\n",
"c 2\n",
"d 3\n",
"dtype: float64"
]
}
],
"prompt_number": 128
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj['b']"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 129,
"text": [
"1.0"
]
}
],
"prompt_number": 129
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj[1]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 130,
"text": [
"1.0"
]
}
],
"prompt_number": 130
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj[2:4]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 131,
"text": [
"c 2\n",
"d 3\n",
"dtype: float64"
]
}
],
"prompt_number": 131
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj[['b', 'a', 'd']]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 132,
"text": [
"b 1\n",
"a 0\n",
"d 3\n",
"dtype: float64"
]
}
],
"prompt_number": 132
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj[[1,3]]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 133,
"text": [
"b 1\n",
"d 3\n",
"dtype: float64"
]
}
],
"prompt_number": 133
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj[obj < 2]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 134,
"text": [
"a 0\n",
"b 1\n",
"dtype: float64"
]
}
],
"prompt_number": 134
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj['b':'c']"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 135,
"text": [
"b 1\n",
"c 2\n",
"dtype: float64"
]
}
],
"prompt_number": 135
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj['b':'c'] = 5\n",
"obj"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 138,
"text": [
"a 0\n",
"b 5\n",
"c 5\n",
"d 3\n",
"dtype: float64"
]
}
],
"prompt_number": 138
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data = DataFrame(np.arange(16).reshape((4,4)), \n",
" index=['Ohio', 'Colorado', 'Utah', 'New York'],\n",
" columns=['one', 'two', 'three', 'four'])\n",
"data"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" <th>three</th>\n",
" <th>four</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 8</td>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New York</th>\n",
" <td> 12</td>\n",
" <td> 13</td>\n",
" <td> 14</td>\n",
" <td> 15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 139,
"text": [
" one two three four\n",
"Ohio 0 1 2 3\n",
"Colorado 4 5 6 7\n",
"Utah 8 9 10 11\n",
"New York 12 13 14 15"
]
}
],
"prompt_number": 139
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data['two']"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 140,
"text": [
"Ohio 1\n",
"Colorado 5\n",
"Utah 9\n",
"New York 13\n",
"Name: two, dtype: int64"
]
}
],
"prompt_number": 140
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data[['three', 'one']]"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>three</th>\n",
" <th>one</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 2</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td> 6</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 10</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New York</th>\n",
" <td> 14</td>\n",
" <td> 12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 141,
"text": [
" three one\n",
"Ohio 2 0\n",
"Colorado 6 4\n",
"Utah 10 8\n",
"New York 14 12"
]
}
],
"prompt_number": 141
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data[:2]"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" <th>three</th>\n",
" <th>four</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 142,
"text": [
" one two three four\n",
"Ohio 0 1 2 3\n",
"Colorado 4 5 6 7"
]
}
],
"prompt_number": 142
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data[data['three'] > 5]"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" <th>three</th>\n",
" <th>four</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 8</td>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New York</th>\n",
" <td> 12</td>\n",
" <td> 13</td>\n",
" <td> 14</td>\n",
" <td> 15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 143,
"text": [
" one two three four\n",
"Colorado 4 5 6 7\n",
"Utah 8 9 10 11\n",
"New York 12 13 14 15"
]
}
],
"prompt_number": 143
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data < 5"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" <th>three</th>\n",
" <th>four</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> True</td>\n",
" <td> True</td>\n",
" <td> True</td>\n",
" <td> True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td> True</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New York</th>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 144,
"text": [
" one two three four\n",
"Ohio True True True True\n",
"Colorado True False False False\n",
"Utah False False False False\n",
"New York False False False False"
]
}
],
"prompt_number": 144
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data[data < 5] = 0\n",
"data"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" <th>three</th>\n",
" <th>four</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td> 0</td>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 8</td>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New York</th>\n",
" <td> 12</td>\n",
" <td> 13</td>\n",
" <td> 14</td>\n",
" <td> 15</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 146,
"text": [
" one two three four\n",
"Ohio 0 0 0 0\n",
"Colorado 0 5 6 7\n",
"Utah 8 9 10 11\n",
"New York 12 13 14 15"
]
}
],
"prompt_number": 146
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.ix['Colorado', ['two', 'three']]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 147,
"text": [
"two 5\n",
"three 6\n",
"Name: Colorado, dtype: int64"
]
}
],
"prompt_number": 147
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.ix[['Colorado', 'Utah'], [3, 0, 1]]"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>four</th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td> 7</td>\n",
" <td> 0</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 11</td>\n",
" <td> 8</td>\n",
" <td> 9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 149,
"text": [
" four one two\n",
"Colorado 7 0 5\n",
"Utah 11 8 9"
]
}
],
"prompt_number": 149
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.ix[2]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 150,
"text": [
"one 8\n",
"two 9\n",
"three 10\n",
"four 11\n",
"Name: Utah, dtype: int64"
]
}
],
"prompt_number": 150
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.ix[:'Utah', 'two']"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 151,
"text": [
"Ohio 0\n",
"Colorado 5\n",
"Utah 9\n",
"Name: two, dtype: int64"
]
}
],
"prompt_number": 151
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.ix[data.three > 5, :3]"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" <th>three</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td> 0</td>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 8</td>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>New York</th>\n",
" <td> 12</td>\n",
" <td> 13</td>\n",
" <td> 14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 152,
"text": [
" one two three\n",
"Colorado 0 5 6\n",
"Utah 8 9 10\n",
"New York 12 13 14"
]
}
],
"prompt_number": 152
},
{
"cell_type": "markdown",
"source": [
"5.2.4 \uc0b0\uc220 \uc5f0\uc0b0\uacfc \ub370\uc774\ud130 \uc815\ub82c"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s1 = Series([7.3, -2.5, 3.4, 1.5], index=['a', 'b', 'c', 'd'])\n",
"s1"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 153,
"text": [
"a 7.3\n",
"b -2.5\n",
"c 3.4\n",
"d 1.5\n",
"dtype: float64"
]
}
],
"prompt_number": 153
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s2 = Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])\n",
"s2"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 154,
"text": [
"a -2.1\n",
"c 3.6\n",
"e -1.5\n",
"f 4.0\n",
"g 3.1\n",
"dtype: float64"
]
}
],
"prompt_number": 154
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"s1 + s2"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 155,
"text": [
"a 5.2\n",
"b NaN\n",
"c 7.0\n",
"d NaN\n",
"e NaN\n",
"f NaN\n",
"g NaN\n",
"dtype: float64"
]
}
],
"prompt_number": 155
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df1 = DataFrame(np.arange(9.).reshape((3,3)), columns=list('bcd'),\n",
" index = ['Ohio', 'Texas', 'Colorado'])\n",
"df1"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Texas</th>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 156,
"text": [
" b c d\n",
"Ohio 0 1 2\n",
"Texas 3 4 5\n",
"Colorado 6 7 8"
]
}
],
"prompt_number": 156
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df2 = DataFrame(np.arange(12.).reshape((4,3)), columns=list('bde'),\n",
" index = ['Utah', 'Ohio', 'Texas', 'Oregon'])\n",
"df2"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Texas</th>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oregon</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 158,
"text": [
" b d e\n",
"Utah 0 1 2\n",
"Ohio 3 4 5\n",
"Texas 6 7 8\n",
"Oregon 9 10 11"
]
}
],
"prompt_number": 158
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df1 + df2"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Colorado</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 6</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oregon</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Texas</th>\n",
" <td> 9</td>\n",
" <td>NaN</td>\n",
" <td> 12</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 159,
"text": [
" b c d e\n",
"Colorado NaN NaN NaN NaN\n",
"Ohio 3 NaN 6 NaN\n",
"Oregon NaN NaN NaN NaN\n",
"Texas 9 NaN 12 NaN\n",
"Utah NaN NaN NaN NaN"
]
}
],
"prompt_number": 159
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df1 = DataFrame(np.arange(12.).reshape((3, 4)), columns=list('abcd'))\n",
"df1"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 8</td>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 161,
"text": [
" a b c d\n",
"0 0 1 2 3\n",
"1 4 5 6 7\n",
"2 8 9 10 11"
]
}
],
"prompt_number": 161
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df2 = DataFrame(np.arange(20.).reshape((4, 5)), columns=list('abcde'))\n",
"df2"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" <td> 9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> 13</td>\n",
" <td> 14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 15</td>\n",
" <td> 16</td>\n",
" <td> 17</td>\n",
" <td> 18</td>\n",
" <td> 19</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 163,
"text": [
" a b c d e\n",
"0 0 1 2 3 4\n",
"1 5 6 7 8 9\n",
"2 10 11 12 13 14\n",
"3 15 16 17 18 19"
]
}
],
"prompt_number": 163
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df1 + df2"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td> 6</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 9</td>\n",
" <td> 11</td>\n",
" <td> 13</td>\n",
" <td> 15</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 18</td>\n",
" <td> 20</td>\n",
" <td> 22</td>\n",
" <td> 24</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 164,
"text": [
" a b c d e\n",
"0 0 2 4 6 NaN\n",
"1 9 11 13 15 NaN\n",
"2 18 20 22 24 NaN\n",
"3 NaN NaN NaN NaN NaN"
]
}
],
"prompt_number": 164
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df1.add(df2, fill_value=0)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td> 6</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 9</td>\n",
" <td> 11</td>\n",
" <td> 13</td>\n",
" <td> 15</td>\n",
" <td> 9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 18</td>\n",
" <td> 20</td>\n",
" <td> 22</td>\n",
" <td> 24</td>\n",
" <td> 14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 15</td>\n",
" <td> 16</td>\n",
" <td> 17</td>\n",
" <td> 18</td>\n",
" <td> 19</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 165,
"text": [
" a b c d e\n",
"0 0 2 4 6 4\n",
"1 9 11 13 15 9\n",
"2 18 20 22 24 14\n",
"3 15 16 17 18 19"
]
}
],
"prompt_number": 165
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df1.reindex(columns=df2.columns, fill_value=0)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 8</td>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 166,
"text": [
" a b c d e\n",
"0 0 1 2 3 0\n",
"1 4 5 6 7 0\n",
"2 8 9 10 11 0"
]
}
],
"prompt_number": 166
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"arr = np.arange(12.).reshape((3,4))\n",
"arr"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 168,
"text": [
"array([[ 0., 1., 2., 3.],\n",
" [ 4., 5., 6., 7.],\n",
" [ 8., 9., 10., 11.]])"
]
}
],
"prompt_number": 168
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"arr[0]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 169,
"text": [
"array([ 0., 1., 2., 3.])"
]
}
],
"prompt_number": 169
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"arr - arr[0]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 170,
"text": [
"array([[ 0., 0., 0., 0.],\n",
" [ 4., 4., 4., 4.],\n",
" [ 8., 8., 8., 8.]])"
]
}
],
"prompt_number": 170
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame = DataFrame(np.arange(12.).reshape((4,3)), columns=list('bde'),\n",
" index = ['Utah', 'Ohio', 'Texas', 'Oregon'])\n",
"series = frame.ix[0]\n",
"frame"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Texas</th>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oregon</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 175,
"text": [
" b d e\n",
"Utah 0 1 2\n",
"Ohio 3 4 5\n",
"Texas 6 7 8\n",
"Oregon 9 10 11"
]
}
],
"prompt_number": 175
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"series"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 176,
"text": [
"b 0\n",
"d 1\n",
"e 2\n",
"Name: Utah, dtype: float64"
]
}
],
"prompt_number": 176
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame - series"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 3</td>\n",
" <td> 3</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Texas</th>\n",
" <td> 6</td>\n",
" <td> 6</td>\n",
" <td> 6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oregon</th>\n",
" <td> 9</td>\n",
" <td> 9</td>\n",
" <td> 9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 177,
"text": [
" b d e\n",
"Utah 0 0 0\n",
"Ohio 3 3 3\n",
"Texas 6 6 6\n",
"Oregon 9 9 9"
]
}
],
"prompt_number": 177
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"series2 = Series(range(3), index=['b', 'e', 'f'])\n",
"frame + series2"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" <th>f</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 0</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 6</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Texas</th>\n",
" <td> 6</td>\n",
" <td>NaN</td>\n",
" <td> 9</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oregon</th>\n",
" <td> 9</td>\n",
" <td>NaN</td>\n",
" <td> 12</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 179,
"text": [
" b d e f\n",
"Utah 0 NaN 3 NaN\n",
"Ohio 3 NaN 6 NaN\n",
"Texas 6 NaN 9 NaN\n",
"Oregon 9 NaN 12 NaN"
]
}
],
"prompt_number": 179
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"series3 = frame['d']\n",
"frame"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Texas</th>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oregon</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 180,
"text": [
" b d e\n",
"Utah 0 1 2\n",
"Ohio 3 4 5\n",
"Texas 6 7 8\n",
"Oregon 9 10 11"
]
}
],
"prompt_number": 180
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"series3"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 181,
"text": [
"Utah 1\n",
"Ohio 4\n",
"Texas 7\n",
"Oregon 10\n",
"Name: d, dtype: float64"
]
}
],
"prompt_number": 181
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame.sub(series3, axis=0)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td>-1</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td>-1</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Texas</th>\n",
" <td>-1</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oregon</th>\n",
" <td>-1</td>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 182,
"text": [
" b d e\n",
"Utah -1 0 1\n",
"Ohio -1 0 1\n",
"Texas -1 0 1\n",
"Oregon -1 0 1"
]
}
],
"prompt_number": 182
},
{
"cell_type": "markdown",
"source": [
"### 5.2.5 \ud568\uc218 \uc801\uc6a9\uacfc \ub9f5\ud551"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame = DataFrame(np.random.randn(12.).reshape((4,3)), columns=list('bde'),\n",
" index = ['Utah', 'Ohio', 'Texas', 'Oregon'])\n",
"frame"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 1.399122</td>\n",
" <td> 0.461512</td>\n",
" <td>-0.747401</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td>-1.501139</td>\n",
" <td> 0.841735</td>\n",
" <td>-0.702116</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Texas</th>\n",
" <td>-0.915986</td>\n",
" <td> 0.532630</td>\n",
" <td>-0.705683</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oregon</th>\n",
" <td>-0.444900</td>\n",
" <td> 0.423755</td>\n",
" <td>-0.755523</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 184,
"text": [
" b d e\n",
"Utah 1.399122 0.461512 -0.747401\n",
"Ohio -1.501139 0.841735 -0.702116\n",
"Texas -0.915986 0.532630 -0.705683\n",
"Oregon -0.444900 0.423755 -0.755523"
]
}
],
"prompt_number": 184
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"np.abs(frame)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 1.399122</td>\n",
" <td> 0.461512</td>\n",
" <td> 0.747401</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> 1.501139</td>\n",
" <td> 0.841735</td>\n",
" <td> 0.702116</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Texas</th>\n",
" <td> 0.915986</td>\n",
" <td> 0.532630</td>\n",
" <td> 0.705683</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oregon</th>\n",
" <td> 0.444900</td>\n",
" <td> 0.423755</td>\n",
" <td> 0.755523</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 185,
"text": [
" b d e\n",
"Utah 1.399122 0.461512 0.747401\n",
"Ohio 1.501139 0.841735 0.702116\n",
"Texas 0.915986 0.532630 0.705683\n",
"Oregon 0.444900 0.423755 0.755523"
]
}
],
"prompt_number": 185
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"f = lambda x: x.max() - x.min()"
],
"language": "python",
"outputs": [],
"prompt_number": 186
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame.apply(f)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 187,
"text": [
"b 2.900261\n",
"d 0.417980\n",
"e 0.053407\n",
"dtype: float64"
]
}
],
"prompt_number": 187
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame.apply(f, axis=1)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 188,
"text": [
"Utah 2.146524\n",
"Ohio 2.342874\n",
"Texas 1.448616\n",
"Oregon 1.179279\n",
"dtype: float64"
]
}
],
"prompt_number": 188
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def f(x):\n",
" return Series([x.min(), x.max()], index = ['min', 'max'])\n",
"\n",
"frame.apply(f)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>-1.501139</td>\n",
" <td> 0.423755</td>\n",
" <td>-0.755523</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 1.399122</td>\n",
" <td> 0.841735</td>\n",
" <td>-0.702116</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 194,
"text": [
" b d e\n",
"min -1.501139 0.423755 -0.755523\n",
"max 1.399122 0.841735 -0.702116"
]
}
],
"prompt_number": 194
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"format = lambda x: '%.2f' % x\n",
"frame.applymap(format)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" <th>d</th>\n",
" <th>e</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Utah</th>\n",
" <td> 1.40</td>\n",
" <td> 0.46</td>\n",
" <td> -0.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Ohio</th>\n",
" <td> -1.50</td>\n",
" <td> 0.84</td>\n",
" <td> -0.70</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Texas</th>\n",
" <td> -0.92</td>\n",
" <td> 0.53</td>\n",
" <td> -0.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Oregon</th>\n",
" <td> -0.44</td>\n",
" <td> 0.42</td>\n",
" <td> -0.76</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 196,
"text": [
" b d e\n",
"Utah 1.40 0.46 -0.75\n",
"Ohio -1.50 0.84 -0.70\n",
"Texas -0.92 0.53 -0.71\n",
"Oregon -0.44 0.42 -0.76"
]
}
],
"prompt_number": 196
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame['e'].map(format)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 197,
"text": [
"Utah -0.75\n",
"Ohio -0.70\n",
"Texas -0.71\n",
"Oregon -0.76\n",
"Name: e, dtype: object"
]
}
],
"prompt_number": 197
},
{
"cell_type": "markdown",
"source": [
"### 5.2.6 \uc815\ub82c\uacfc \uc21c\uc704"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = Series(range(4), index=['d', 'a', 'b', 'c'])\n",
"obj.sort_index()"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 198,
"text": [
"a 1\n",
"b 2\n",
"c 3\n",
"d 0\n",
"dtype: int64"
]
}
],
"prompt_number": 198
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame = DataFrame(np.arange(8).reshape((2,4)), index=['three', 'one'],\n",
" columns = ['d', 'a', 'b', 'c'])\n",
"frame.sort_index()"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>d</th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>one</th>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>three</th>\n",
" <td> 0</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 199,
"text": [
" d a b c\n",
"one 4 5 6 7\n",
"three 0 1 2 3"
]
}
],
"prompt_number": 199
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame.sort_index(axis=1)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>three</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>one</th>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 200,
"text": [
" a b c d\n",
"three 1 2 3 0\n",
"one 5 6 7 4"
]
}
],
"prompt_number": 200
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame.sort_index(axis=1, ascending=False)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>d</th>\n",
" <th>c</th>\n",
" <th>b</th>\n",
" <th>a</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>three</th>\n",
" <td> 0</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>one</th>\n",
" <td> 4</td>\n",
" <td> 7</td>\n",
" <td> 6</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 201,
"text": [
" d c b a\n",
"three 0 3 2 1\n",
"one 4 7 6 5"
]
}
],
"prompt_number": 201
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = Series([4, 7, -3, 2])\n",
"obj.order()"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 202,
"text": [
"2 -3\n",
"3 2\n",
"0 4\n",
"1 7\n",
"dtype: int64"
]
}
],
"prompt_number": 202
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = Series([4, np.nan, 7, np.nan, -3, 2])\n",
"obj.order()"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 203,
"text": [
"4 -3\n",
"5 2\n",
"0 4\n",
"2 7\n",
"1 NaN\n",
"3 NaN\n",
"dtype: float64"
]
}
],
"prompt_number": 203
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame = DataFrame({'b': [4, 7, -3, 2], 'a': [0, 1, 0, 1]})\n",
"frame"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 1</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 0</td>\n",
" <td>-3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 204,
"text": [
" a b\n",
"0 0 4\n",
"1 1 7\n",
"2 0 -3\n",
"3 1 2"
]
}
],
"prompt_number": 204
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame.sort_index(by='b')"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 0</td>\n",
" <td>-3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 1</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 205,
"text": [
" a b\n",
"2 0 -3\n",
"3 1 2\n",
"0 0 4\n",
"1 1 7"
]
}
],
"prompt_number": 205
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame.sort_index(by=['a', 'b'])"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 0</td>\n",
" <td>-3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 1</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 206,
"text": [
" a b\n",
"2 0 -3\n",
"0 0 4\n",
"3 1 2\n",
"1 1 7"
]
}
],
"prompt_number": 206
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = Series([7, -5, 4, 2, 0, 4])\n",
"obj.rank()"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 207,
"text": [
"0 6.0\n",
"1 1.0\n",
"2 4.5\n",
"3 3.0\n",
"4 2.0\n",
"5 4.5\n",
"dtype: float64"
]
}
],
"prompt_number": 207
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj.rank(method='first')"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 208,
"text": [
"0 6\n",
"1 1\n",
"2 4\n",
"3 3\n",
"4 2\n",
"5 5\n",
"dtype: float64"
]
}
],
"prompt_number": 208
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj.rank(ascending=False, method='max')"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 209,
"text": [
"0 1\n",
"1 6\n",
"2 3\n",
"3 4\n",
"4 5\n",
"5 3\n",
"dtype: float64"
]
}
],
"prompt_number": 209
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame = DataFrame({'b': [4, 7, -3, 2], 'a': [0, 1, 0, 1], 'c':[-2, 5, 8, -2.5]})\n",
"frame"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0</td>\n",
" <td> 4</td>\n",
" <td>-2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 1</td>\n",
" <td> 7</td>\n",
" <td> 5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 0</td>\n",
" <td>-3</td>\n",
" <td> 8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td>-2.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 210,
"text": [
" a b c\n",
"0 0 4 -2.0\n",
"1 1 7 5.0\n",
"2 0 -3 8.0\n",
"3 1 2 -2.5"
]
}
],
"prompt_number": 210
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame.rank(axis=1)"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 211,
"text": [
" a b c\n",
"0 2 3 1\n",
"1 1 3 2\n",
"2 2 1 3\n",
"3 2 3 1"
]
}
],
"prompt_number": 211
},
{
"cell_type": "markdown",
"source": [
"### \uc911\ubcf5 \uc0c9\uc778"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = Series(range(5), index = ['a', 'a', 'b', 'b', 'c'])\n",
"obj"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 212,
"text": [
"a 0\n",
"a 1\n",
"b 2\n",
"b 3\n",
"c 4\n",
"dtype: int64"
]
}
],
"prompt_number": 212
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj.index.is_unique"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 213,
"text": [
"False"
]
}
],
"prompt_number": 213
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj['a']"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 214,
"text": [
"a 0\n",
"a 1\n",
"dtype: int64"
]
}
],
"prompt_number": 214
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj['c']"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 215,
"text": [
"4"
]
}
],
"prompt_number": 215
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = DataFrame(np.random.randn(4,3), index=['a', 'a', 'b', 'b'])\n",
"df"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>-0.942858</td>\n",
" <td> 1.531089</td>\n",
" <td>-1.354422</td>\n",
" </tr>\n",
" <tr>\n",
" <th>a</th>\n",
" <td> 0.551932</td>\n",
" <td>-0.507319</td>\n",
" <td>-0.139572</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td> 1.615429</td>\n",
" <td> 0.827606</td>\n",
" <td> 1.837605</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td> 1.789760</td>\n",
" <td>-0.918501</td>\n",
" <td>-0.391268</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 217,
"text": [
" 0 1 2\n",
"a -0.942858 1.531089 -1.354422\n",
"a 0.551932 -0.507319 -0.139572\n",
"b 1.615429 0.827606 1.837605\n",
"b 1.789760 -0.918501 -0.391268"
]
}
],
"prompt_number": 217
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.ix['b']"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>b</th>\n",
" <td> 1.615429</td>\n",
" <td> 0.827606</td>\n",
" <td> 1.837605</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td> 1.789760</td>\n",
" <td>-0.918501</td>\n",
" <td>-0.391268</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 218,
"text": [
" 0 1 2\n",
"b 1.615429 0.827606 1.837605\n",
"b 1.789760 -0.918501 -0.391268"
]
}
],
"prompt_number": 218
},
{
"cell_type": "markdown",
"source": [
"## 5.3 \uae30\uc220\ud1b5\uacc4 \uacc4\uc0b0\uacfc \uc694\uc57d"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = DataFrame([[1.4, np.nan], [7.1, 4.5], [np.nan, np.nan], [0.75, -1.3]],\n",
" index = ['a', 'b', 'c', 'd'],\n",
" columns=['one', 'two'])\n",
"df"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td> 1.40</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td> 7.10</td>\n",
" <td> 4.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td> 0.75</td>\n",
" <td>-1.3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 219,
"text": [
" one two\n",
"a 1.40 NaN\n",
"b 7.10 4.5\n",
"c NaN NaN\n",
"d 0.75 -1.3"
]
}
],
"prompt_number": 219
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.sum()"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 220,
"text": [
"one 9.25\n",
"two 3.20\n",
"dtype: float64"
]
}
],
"prompt_number": 220
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.sum(axis=1)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 221,
"text": [
"a 1.40\n",
"b 11.60\n",
"c NaN\n",
"d -0.55\n",
"dtype: float64"
]
}
],
"prompt_number": 221
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.mean(axis=1, skipna=False)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 222,
"text": [
"a NaN\n",
"b 5.800\n",
"c NaN\n",
"d -0.275\n",
"dtype: float64"
]
}
],
"prompt_number": 222
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.idxmax()"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 223,
"text": [
"one b\n",
"two b\n",
"dtype: object"
]
}
],
"prompt_number": 223
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.cumsum()"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td> 1.40</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td> 8.50</td>\n",
" <td> 4.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td> 9.25</td>\n",
" <td> 3.2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 224,
"text": [
" one two\n",
"a 1.40 NaN\n",
"b 8.50 4.5\n",
"c NaN NaN\n",
"d 9.25 3.2"
]
}
],
"prompt_number": 224
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.describe()"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td> 3.000000</td>\n",
" <td> 2.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 3.083333</td>\n",
" <td> 1.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 3.493685</td>\n",
" <td> 4.101219</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 0.750000</td>\n",
" <td>-1.300000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 1.075000</td>\n",
" <td> 0.150000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 1.400000</td>\n",
" <td> 1.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 4.250000</td>\n",
" <td> 3.050000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 7.100000</td>\n",
" <td> 4.500000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 225,
"text": [
" one two\n",
"count 3.000000 2.000000\n",
"mean 3.083333 1.600000\n",
"std 3.493685 4.101219\n",
"min 0.750000 -1.300000\n",
"25% 1.075000 0.150000\n",
"50% 1.400000 1.600000\n",
"75% 4.250000 3.050000\n",
"max 7.100000 4.500000"
]
}
],
"prompt_number": 225
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = Series(['a', 'a', 'b', 'c'] * 4)\n",
"obj.describe()"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 231,
"text": [
"count 16\n",
"unique 3\n",
"top a\n",
"freq 8\n",
"dtype: object"
]
}
],
"prompt_number": 231
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas.io.data as web\n",
"all_data = {}\n",
"for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']:\n",
" all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2000', '1/1/2010')\n",
"\n",
"price = DataFrame({tic: data['Adj Close']\n",
" for tic, data in all_data.iteritems()})\n",
"\n",
"volume = DataFrame({tic: data['Volume']\n",
" for tic, data in all_data.iteritems()})"
],
"language": "python",
"outputs": [],
"prompt_number": 249
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"price.tail()"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AAPL</th>\n",
" <th>GOOG</th>\n",
" <th>IBM</th>\n",
" <th>MSFT</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2009-12-24</th>\n",
" <td> 202.12</td>\n",
" <td> 618.48</td>\n",
" <td> 121.54</td>\n",
" <td> 27.92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2009-12-28</th>\n",
" <td> 204.61</td>\n",
" <td> 622.87</td>\n",
" <td> 123.16</td>\n",
" <td> 28.08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2009-12-29</th>\n",
" <td> 202.18</td>\n",
" <td> 619.40</td>\n",
" <td> 122.74</td>\n",
" <td> 28.27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2009-12-30</th>\n",
" <td> 204.64</td>\n",
" <td> 622.73</td>\n",
" <td> 123.41</td>\n",
" <td> 27.89</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2009-12-31</th>\n",
" <td> 203.76</td>\n",
" <td> 619.98</td>\n",
" <td> 121.85</td>\n",
" <td> 27.45</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 236,
"text": [
" AAPL GOOG IBM MSFT\n",
"Date \n",
"2009-12-24 202.12 618.48 121.54 27.92\n",
"2009-12-28 204.61 622.87 123.16 28.08\n",
"2009-12-29 202.18 619.40 122.74 28.27\n",
"2009-12-30 204.64 622.73 123.41 27.89\n",
"2009-12-31 203.76 619.98 121.85 27.45"
]
}
],
"prompt_number": 236
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"len(price)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 237,
"text": [
"2515"
]
}
],
"prompt_number": 237
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"returns = price.pct_change()"
],
"language": "python",
"outputs": [],
"prompt_number": 239
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"returns.tail()"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AAPL</th>\n",
" <th>GOOG</th>\n",
" <th>IBM</th>\n",
" <th>MSFT</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Date</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2009-12-24</th>\n",
" <td> 0.034338</td>\n",
" <td> 0.011117</td>\n",
" <td> 0.004380</td>\n",
" <td> 0.002513</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2009-12-28</th>\n",
" <td> 0.012319</td>\n",
" <td> 0.007098</td>\n",
" <td> 0.013329</td>\n",
" <td> 0.005731</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2009-12-29</th>\n",
" <td>-0.011876</td>\n",
" <td>-0.005571</td>\n",
" <td>-0.003410</td>\n",
" <td> 0.006766</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2009-12-30</th>\n",
" <td> 0.012167</td>\n",
" <td> 0.005376</td>\n",
" <td> 0.005459</td>\n",
" <td>-0.013442</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2009-12-31</th>\n",
" <td>-0.004300</td>\n",
" <td>-0.004416</td>\n",
" <td>-0.012641</td>\n",
" <td>-0.015776</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 240,
"text": [
" AAPL GOOG IBM MSFT\n",
"Date \n",
"2009-12-24 0.034338 0.011117 0.004380 0.002513\n",
"2009-12-28 0.012319 0.007098 0.013329 0.005731\n",
"2009-12-29 -0.011876 -0.005571 -0.003410 0.006766\n",
"2009-12-30 0.012167 0.005376 0.005459 -0.013442\n",
"2009-12-31 -0.004300 -0.004416 -0.012641 -0.015776"
]
}
],
"prompt_number": 240
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"returns.MSFT.corr(returns.IBM)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 241,
"text": [
"0.49594890956910831"
]
}
],
"prompt_number": 241
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"returns.MSFT.cov(returns.IBM)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 242,
"text": [
"0.00021595142140529413"
]
}
],
"prompt_number": 242
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"returns.corr()"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AAPL</th>\n",
" <th>GOOG</th>\n",
" <th>IBM</th>\n",
" <th>MSFT</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>AAPL</th>\n",
" <td> 1.000000</td>\n",
" <td> 0.470756</td>\n",
" <td> 0.410074</td>\n",
" <td> 0.424234</td>\n",
" </tr>\n",
" <tr>\n",
" <th>GOOG</th>\n",
" <td> 0.470756</td>\n",
" <td> 1.000000</td>\n",
" <td> 0.390767</td>\n",
" <td> 0.443565</td>\n",
" </tr>\n",
" <tr>\n",
" <th>IBM</th>\n",
" <td> 0.410074</td>\n",
" <td> 0.390767</td>\n",
" <td> 1.000000</td>\n",
" <td> 0.495949</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MSFT</th>\n",
" <td> 0.424234</td>\n",
" <td> 0.443565</td>\n",
" <td> 0.495949</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 243,
"text": [
" AAPL GOOG IBM MSFT\n",
"AAPL 1.000000 0.470756 0.410074 0.424234\n",
"GOOG 0.470756 1.000000 0.390767 0.443565\n",
"IBM 0.410074 0.390767 1.000000 0.495949\n",
"MSFT 0.424234 0.443565 0.495949 1.000000"
]
}
],
"prompt_number": 243
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"returns.cov()"
],
"language": "python",
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AAPL</th>\n",
" <th>GOOG</th>\n",
" <th>IBM</th>\n",
" <th>MSFT</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>AAPL</th>\n",
" <td> 0.001027</td>\n",
" <td> 0.000303</td>\n",
" <td> 0.000252</td>\n",
" <td> 0.000309</td>\n",
" </tr>\n",
" <tr>\n",
" <th>GOOG</th>\n",
" <td> 0.000303</td>\n",
" <td> 0.000580</td>\n",
" <td> 0.000142</td>\n",
" <td> 0.000205</td>\n",
" </tr>\n",
" <tr>\n",
" <th>IBM</th>\n",
" <td> 0.000252</td>\n",
" <td> 0.000142</td>\n",
" <td> 0.000367</td>\n",
" <td> 0.000216</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MSFT</th>\n",
" <td> 0.000309</td>\n",
" <td> 0.000205</td>\n",
" <td> 0.000216</td>\n",
" <td> 0.000516</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"output_type": "pyout",
"prompt_number": 244,
"text": [
" AAPL GOOG IBM MSFT\n",
"AAPL 0.001027 0.000303 0.000252 0.000309\n",
"GOOG 0.000303 0.000580 0.000142 0.000205\n",
"IBM 0.000252 0.000142 0.000367 0.000216\n",
"MSFT 0.000309 0.000205 0.000216 0.000516"
]
}
],
"prompt_number": 244
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"returns.corrwith(returns.IBM)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 245,
"text": [
"AAPL 0.410074\n",
"GOOG 0.390767\n",
"IBM 1.000000\n",
"MSFT 0.495949\n",
"dtype: float64"
]
}
],
"prompt_number": 245
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"returns.corrwith(volume)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 250,
"text": [
"AAPL -0.057495\n",
"GOOG 0.062644\n",
"IBM -0.007886\n",
"MSFT -0.014340\n",
"dtype: float64"
]
}
],
"prompt_number": 250
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])"
],
"language": "python",
"outputs": [],
"prompt_number": 251
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"uniques = obj.unique()\n",
"uniques"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 252,
"text": [
"array(['c', 'a', 'd', 'b'], dtype=object)"
]
}
],
"prompt_number": 252
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj.value_counts()"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 253,
"text": [
"c 3\n",
"a 3\n",
"b 2\n",
"d 1\n",
"dtype: int64"
]
}
],
"prompt_number": 253
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.value_counts(obj.values, sort=False)"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 254,
"text": [
"a 3\n",
"c 3\n",
"b 2\n",
"d 1\n",
"dtype: int64"
]
}
],
"prompt_number": 254
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"mask = obj.isin(['b', 'c'])"
],
"language": "python",
"outputs": [],
"prompt_number": 255
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"mask"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 256,
"text": [
"0 True\n",
"1 False\n",
"2 False\n",
"3 False\n",
"4 False\n",
"5 True\n",
"6 True\n",
"7 True\n",
"8 True\n",
"dtype: bool"
]
}
],
"prompt_number": 256
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj[mask]"
],
"language": "python",
"outputs": [
{
"output_type": "pyout",
"prompt_number": 257,
"text": [
"0 c\n",
"5 b\n",
"6 b\n",
"7 c\n",
"8 c\n",
"dtype: object"
]
}
],
"prompt_number": 257
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data ="
],
"language": "python",
"outputs": []
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment