Created
September 6, 2013 02:21
-
-
Save upepo/6458776 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "5. Getting Started with pandas" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# 5. Getting Started with pandas\n", | |
"\n", | |
"* It contains high-level data structures and manipulation tools designed to make data analysis fast and easy in Python.\n", | |
"* pandas is built on top of NumPy and makes it easy to use in NumPy-centric applications." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from pandas import Series, DataFrame\n", | |
"import pandas as pd \n", | |
"import numpy as np" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Series\n", | |
"\n", | |
"A Series is a one-dimensional array-like object containing an array of data (of any NumPy data type) and an associated array of data labels, called its index." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj = Series([4,7,-5,3])\n", | |
"obj" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 167, | |
"text": [ | |
"0 4\n", | |
"1 7\n", | |
"2 -5\n", | |
"3 3\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 167 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj.values" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
"array([ 4, 7, -5, 3])" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj.index" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 4, | |
"text": [ | |
"Int64Index([0, 1, 2, 3], dtype=int64)" | |
] | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"dir(obj)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 5, | |
"text": [ | |
"['T',\n", | |
" '_AXIS_ALIASES',\n", | |
" '_AXIS_NAMES',\n", | |
" '_AXIS_NUMBERS',\n", | |
" '__abs__',\n", | |
" '__add__',\n", | |
" '__and__',\n", | |
" '__array__',\n", | |
" '__array_finalize__',\n", | |
" '__array_interface__',\n", | |
" '__array_prepare__',\n", | |
" '__array_priority__',\n", | |
" '__array_struct__',\n", | |
" '__array_wrap__',\n", | |
" '__bytes__',\n", | |
" '__class__',\n", | |
" '__contains__',\n", | |
" '__copy__',\n", | |
" '__deepcopy__',\n", | |
" '__delattr__',\n", | |
" '__delitem__',\n", | |
" '__delslice__',\n", | |
" '__dict__',\n", | |
" '__div__',\n", | |
" '__divmod__',\n", | |
" '__doc__',\n", | |
" '__eq__',\n", | |
" '__float__',\n", | |
" '__floordiv__',\n", | |
" '__format__',\n", | |
" '__ge__',\n", | |
" '__getattribute__',\n", | |
" '__getitem__',\n", | |
" '__getslice__',\n", | |
" '__gt__',\n", | |
" '__hash__',\n", | |
" '__hex__',\n", | |
" '__iadd__',\n", | |
" '__iand__',\n", | |
" '__idiv__',\n", | |
" '__ifloordiv__',\n", | |
" '__ilshift__',\n", | |
" '__imod__',\n", | |
" '__imul__',\n", | |
" '__index__',\n", | |
" '__init__',\n", | |
" '__int__',\n", | |
" '__invert__',\n", | |
" '__ior__',\n", | |
" '__ipow__',\n", | |
" '__irshift__',\n", | |
" '__isub__',\n", | |
" '__iter__',\n", | |
" '__itruediv__',\n", | |
" '__ixor__',\n", | |
" '__le__',\n", | |
" '__len__',\n", | |
" '__long__',\n", | |
" '__lshift__',\n", | |
" '__lt__',\n", | |
" '__mod__',\n", | |
" '__module__',\n", | |
" '__mul__',\n", | |
" '__ne__',\n", | |
" '__neg__',\n", | |
" '__new__',\n", | |
" '__nonzero__',\n", | |
" '__oct__',\n", | |
" '__or__',\n", | |
" '__pos__',\n", | |
" '__pow__',\n", | |
" '__radd__',\n", | |
" '__rand__',\n", | |
" '__rdiv__',\n", | |
" '__rdivmod__',\n", | |
" '__reduce__',\n", | |
" '__reduce_ex__',\n", | |
" '__repr__',\n", | |
" '__rfloordiv__',\n", | |
" '__rlshift__',\n", | |
" '__rmod__',\n", | |
" '__rmul__',\n", | |
" '__ror__',\n", | |
" '__rpow__',\n", | |
" '__rrshift__',\n", | |
" '__rshift__',\n", | |
" '__rsub__',\n", | |
" '__rtruediv__',\n", | |
" '__rxor__',\n", | |
" '__setattr__',\n", | |
" '__setitem__',\n", | |
" '__setslice__',\n", | |
" '__setstate__',\n", | |
" '__sizeof__',\n", | |
" '__str__',\n", | |
" '__sub__',\n", | |
" '__subclasshook__',\n", | |
" '__truediv__',\n", | |
" '__unicode__',\n", | |
" '__weakref__',\n", | |
" '__xor__',\n", | |
" '_agg_by_level',\n", | |
" '_at',\n", | |
" '_binop',\n", | |
" '_can_hold_na',\n", | |
" '_constructor',\n", | |
" '_create_indexer',\n", | |
" '_get_axis',\n", | |
" '_get_axis_name',\n", | |
" '_get_axis_number',\n", | |
" '_get_repr',\n", | |
" '_get_val_at',\n", | |
" '_get_values',\n", | |
" '_get_values_tuple',\n", | |
" '_get_with',\n", | |
" '_iat',\n", | |
" '_iloc',\n", | |
" '_index',\n", | |
" '_is_mixed_type',\n", | |
" '_ix',\n", | |
" '_ixs',\n", | |
" '_loc',\n", | |
" '_make_time_series',\n", | |
" '_reindex_indexer',\n", | |
" '_repr_footer',\n", | |
" '_set_labels',\n", | |
" '_set_values',\n", | |
" '_set_with',\n", | |
" '_slice',\n", | |
" '_tidy_repr',\n", | |
" '_xs',\n", | |
" 'abs',\n", | |
" 'add',\n", | |
" 'align',\n", | |
" 'all',\n", | |
" 'any',\n", | |
" 'append',\n", | |
" 'apply',\n", | |
" 'argmax',\n", | |
" 'argmin',\n", | |
" 'argsort',\n", | |
" 'asfreq',\n", | |
" 'asof',\n", | |
" 'astype',\n", | |
" 'at',\n", | |
" 'at_time',\n", | |
" 'autocorr',\n", | |
" 'axes',\n", | |
" 'base',\n", | |
" 'between',\n", | |
" 'between_time',\n", | |
" 'bfill',\n", | |
" 'byteswap',\n", | |
" 'choose',\n", | |
" 'clip',\n", | |
" 'clip_lower',\n", | |
" 'clip_upper',\n", | |
" 'combine',\n", | |
" 'combine_first',\n", | |
" 'compress',\n", | |
" 'conj',\n", | |
" 'conjugate',\n", | |
" 'convert_objects',\n", | |
" 'copy',\n", | |
" 'corr',\n", | |
" 'count',\n", | |
" 'cov',\n", | |
" 'ctypes',\n", | |
" 'cummax',\n", | |
" 'cummin',\n", | |
" 'cumprod',\n", | |
" 'cumsum',\n", | |
" 'data',\n", | |
" 'describe',\n", | |
" 'diagonal',\n", | |
" 'diff',\n", | |
" 'div',\n", | |
" 'dot',\n", | |
" 'drop',\n", | |
" 'drop_duplicates',\n", | |
" 'dropna',\n", | |
" 'dtype',\n", | |
" 'dump',\n", | |
" 'dumps',\n", | |
" 'duplicated',\n", | |
" 'ffill',\n", | |
" 'fill',\n", | |
" 'fillna',\n", | |
" 'first',\n", | |
" 'first_valid_index',\n", | |
" 'flags',\n", | |
" 'flat',\n", | |
" 'flatten',\n", | |
" 'from_array',\n", | |
" 'from_csv',\n", | |
" 'get',\n", | |
" 'get_dtype_counts',\n", | |
" 'get_value',\n", | |
" 'getfield',\n", | |
" 'groupby',\n", | |
" 'head',\n", | |
" 'hist',\n", | |
" 'iat',\n", | |
" 'idxmax',\n", | |
" 'idxmin',\n", | |
" 'iget',\n", | |
" 'iget_value',\n", | |
" 'iloc',\n", | |
" 'imag',\n", | |
" 'index',\n", | |
" 'interpolate',\n", | |
" 'irow',\n", | |
" 'isin',\n", | |
" 'isnull',\n", | |
" 'item',\n", | |
" 'itemset',\n", | |
" 'itemsize',\n", | |
" 'iteritems',\n", | |
" 'iterkv',\n", | |
" 'ix',\n", | |
" 'keys',\n", | |
" 'kurt',\n", | |
" 'last',\n", | |
" 'last_valid_index',\n", | |
" 'load',\n", | |
" 'loc',\n", | |
" 'mad',\n", | |
" 'map',\n", | |
" 'mask',\n", | |
" 'max',\n", | |
" 'mean',\n", | |
" 'median',\n", | |
" 'min',\n", | |
" 'mul',\n", | |
" 'name',\n", | |
" 'nbytes',\n", | |
" 'ndim',\n", | |
" 'newbyteorder',\n", | |
" 'nonzero',\n", | |
" 'notnull',\n", | |
" 'nunique',\n", | |
" 'order',\n", | |
" 'pct_change',\n", | |
" 'plot',\n", | |
" 'prod',\n", | |
" 'ptp',\n", | |
" 'put',\n", | |
" 'quantile',\n", | |
" 'rank',\n", | |
" 'ravel',\n", | |
" 'real',\n", | |
" 'reindex',\n", | |
" 'reindex_axis',\n", | |
" 'reindex_like',\n", | |
" 'rename',\n", | |
" 'reorder_levels',\n", | |
" 'repeat',\n", | |
" 'replace',\n", | |
" 'resample',\n", | |
" 'reset_index',\n", | |
" 'reshape',\n", | |
" 'resize',\n", | |
" 'round',\n", | |
" 'save',\n", | |
" 'searchsorted',\n", | |
" 'select',\n", | |
" 'set_value',\n", | |
" 'setasflat',\n", | |
" 'setfield',\n", | |
" 'setflags',\n", | |
" 'shape',\n", | |
" 'shift',\n", | |
" 'size',\n", | |
" 'skew',\n", | |
" 'sort',\n", | |
" 'sort_index',\n", | |
" 'sortlevel',\n", | |
" 'squeeze',\n", | |
" 'std',\n", | |
" 'str',\n", | |
" 'strides',\n", | |
" 'sub',\n", | |
" 'sum',\n", | |
" 'swapaxes',\n", | |
" 'swaplevel',\n", | |
" 'tail',\n", | |
" 'take',\n", | |
" 'to_csv',\n", | |
" 'to_dict',\n", | |
" 'to_hdf',\n", | |
" 'to_sparse',\n", | |
" 'to_string',\n", | |
" 'tofile',\n", | |
" 'tolist',\n", | |
" 'tostring',\n", | |
" 'trace',\n", | |
" 'transpose',\n", | |
" 'truncate',\n", | |
" 'tshift',\n", | |
" 'tz_convert',\n", | |
" 'tz_localize',\n", | |
" 'unique',\n", | |
" 'unstack',\n", | |
" 'update',\n", | |
" 'valid',\n", | |
" 'value_counts',\n", | |
" 'values',\n", | |
" 'var',\n", | |
" 'view',\n", | |
" 'weekday',\n", | |
" 'where']" | |
] | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj2 = Series([4,7,-5,3], index=['d','b','a','c'])\n", | |
"obj2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": [ | |
"d 4\n", | |
"b 7\n", | |
"a -5\n", | |
"c 3\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj2.index" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 7, | |
"text": [ | |
"Index([d, b, a, c], dtype=object)" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print obj2['a']\n", | |
"obj2['d']=6\n", | |
"obj2[['c','a','d']]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"-5\n" | |
] | |
}, | |
{ | |
"output_type": "pyout", | |
"prompt_number": 8, | |
"text": [ | |
"c 3\n", | |
"a -5\n", | |
"d 6\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj2[obj2>0]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 170, | |
"text": [ | |
"b 7.2\n", | |
"c 3.6\n", | |
"d 4.5\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 170 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj2*2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 10, | |
"text": [ | |
"d 12\n", | |
"b 14\n", | |
"a -10\n", | |
"c 6\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy as np\n", | |
"np.exp(obj2)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 175, | |
"text": [ | |
"a 0.004992\n", | |
"b 1339.430764\n", | |
"c 36.598234\n", | |
"d 90.017131\n", | |
"e NaN\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 175 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print 'b' in obj2\n", | |
"print 'e' in obj2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"True\n", | |
"False\n" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#python dict to Series\n", | |
"sdata = {'Ohio':35000, 'Texas':71000, 'Oregon':16000, 'Utah':5000}\n", | |
"obj3 = Series(sdata)\n", | |
"obj3" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 13, | |
"text": [ | |
"Ohio 35000\n", | |
"Oregon 16000\n", | |
"Texas 71000\n", | |
"Utah 5000\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"states=['California', 'Ohio', 'Oregon', 'Texas']\n", | |
"obj4 = Series(sdata, index=states)\n", | |
"obj4" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 14, | |
"text": [ | |
"California NaN\n", | |
"Ohio 35000\n", | |
"Oregon 16000\n", | |
"Texas 71000\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 14 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print pd.isnull(obj4)\n", | |
"print '----'\n", | |
"print pd.notnull(obj4)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"California True\n", | |
"Ohio False\n", | |
"Oregon False\n", | |
"Texas False\n", | |
"dtype: bool\n", | |
"----\n", | |
"California False\n", | |
"Ohio True\n", | |
"Oregon True\n", | |
"Texas True\n", | |
"dtype: bool\n" | |
] | |
} | |
], | |
"prompt_number": 16 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj3+obj4" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 17, | |
"text": [ | |
"California NaN\n", | |
"Ohio 70000\n", | |
"Oregon 32000\n", | |
"Texas 142000\n", | |
"Utah NaN\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 17 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj4" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 18, | |
"text": [ | |
"California NaN\n", | |
"Ohio 35000\n", | |
"Oregon 16000\n", | |
"Texas 71000\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 18 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj4.name = 'population'\n", | |
"obj4.index.name = 'state'\n", | |
"obj4" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 19, | |
"text": [ | |
"state\n", | |
"California NaN\n", | |
"Ohio 35000\n", | |
"Oregon 16000\n", | |
"Texas 71000\n", | |
"Name: population, dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 19 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj.index=['Bob','Steve','Jeff','Ryan']\n", | |
"obj\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 178, | |
"text": [ | |
"Bob 4\n", | |
"Steve 7\n", | |
"Jeff -5\n", | |
"Ryan 3\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 178 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## DataFrame\n", | |
"\n", | |
"* A DataFrame represents a tabular, spreadsheet-like data structure containing an ordered collection of columns, each of which can be a different value type (numeric, string, boolean, etc.). \n", | |
"* The DataFrame has both a row and column index; it can be thought of as a dict of Series (one for all sharing the same index). \n", | |
"* Compared with other such DataFrame-like structures you may have used before (like R\u2019s data.frame), row-oriented and column-oriented operations in DataFrame are treated roughly symmetrically. \n", | |
"* Under the hood, the data is stored as one or more two-dimensional blocks rather than a list, dict, or some other collection of one-dimensional arrays." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data = {'state':['Ohio','Ohio','Ohio', 'Nevada', 'Nevada'],\n", | |
" 'year':[2000,2001,2002,2001,2002],\n", | |
" 'pop':[1.5,1.7,3.6,2.4,2.9]}\n", | |
"frame = DataFrame(data)\n", | |
"frame" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>pop</th>\n", | |
" <th>state</th>\n", | |
" <th>year</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 1.5</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 2000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 1.7</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 2001</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 3.6</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 2002</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 2.4</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2001</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 2.9</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2002</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 186, | |
"text": [ | |
" pop state year\n", | |
"0 1.5 Ohio 2000\n", | |
"1 1.7 Ohio 2001\n", | |
"2 3.6 Ohio 2002\n", | |
"3 2.4 Nevada 2001\n", | |
"4 2.9 Nevada 2002" | |
] | |
} | |
], | |
"prompt_number": 186 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"DataFrame(data, columns=['year','state','pop'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>year</th>\n", | |
" <th>state</th>\n", | |
" <th>pop</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 2000</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 3.6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.9</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 187, | |
"text": [ | |
" year state pop\n", | |
"0 2000 Ohio 1.5\n", | |
"1 2001 Ohio 1.7\n", | |
"2 2002 Ohio 3.6\n", | |
"3 2001 Nevada 2.4\n", | |
"4 2002 Nevada 2.9" | |
] | |
} | |
], | |
"prompt_number": 187 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame2= DataFrame(data, columns=['year', 'state','pop', 'debt'], \n", | |
" index=['one','two','three','four','five'])\n", | |
"frame2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>year</th>\n", | |
" <th>state</th>\n", | |
" <th>pop</th>\n", | |
" <th>debt</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td> 2000</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.5</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.7</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 3.6</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>four</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.4</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>five</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.9</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 188, | |
"text": [ | |
" year state pop debt\n", | |
"one 2000 Ohio 1.5 NaN\n", | |
"two 2001 Ohio 1.7 NaN\n", | |
"three 2002 Ohio 3.6 NaN\n", | |
"four 2001 Nevada 2.4 NaN\n", | |
"five 2002 Nevada 2.9 NaN" | |
] | |
} | |
], | |
"prompt_number": 188 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame2['state']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 197, | |
"text": [ | |
"one Ohio\n", | |
"two Ohio\n", | |
"three Ohio\n", | |
"four Nevada\n", | |
"five Nevada\n", | |
"Name: state, dtype: object" | |
] | |
} | |
], | |
"prompt_number": 197 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame2.ix['three']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 201, | |
"text": [ | |
"year 2002\n", | |
"state Ohio\n", | |
"pop 3.6\n", | |
"debt NaN\n", | |
"Name: three, dtype: object" | |
] | |
} | |
], | |
"prompt_number": 201 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame2['debt']=16.5\n", | |
"frame2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>year</th>\n", | |
" <th>state</th>\n", | |
" <th>pop</th>\n", | |
" <th>debt</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td> 2000</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.5</td>\n", | |
" <td> 16.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.7</td>\n", | |
" <td> 16.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 3.6</td>\n", | |
" <td> 16.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>four</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.4</td>\n", | |
" <td> 16.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>five</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.9</td>\n", | |
" <td> 16.5</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 28, | |
"text": [ | |
" year state pop debt\n", | |
"one 2000 Ohio 1.5 16.5\n", | |
"two 2001 Ohio 1.7 16.5\n", | |
"three 2002 Ohio 3.6 16.5\n", | |
"four 2001 Nevada 2.4 16.5\n", | |
"five 2002 Nevada 2.9 16.5" | |
] | |
} | |
], | |
"prompt_number": 28 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame2['debt'] = np.arange(5.)\n", | |
"frame2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>year</th>\n", | |
" <th>state</th>\n", | |
" <th>pop</th>\n", | |
" <th>debt</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td> 2000</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.5</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.7</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 3.6</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>four</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.4</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>five</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.9</td>\n", | |
" <td> 4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 29, | |
"text": [ | |
" year state pop debt\n", | |
"one 2000 Ohio 1.5 0\n", | |
"two 2001 Ohio 1.7 1\n", | |
"three 2002 Ohio 3.6 2\n", | |
"four 2001 Nevada 2.4 3\n", | |
"five 2002 Nevada 2.9 4" | |
] | |
} | |
], | |
"prompt_number": 29 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"np.arange(5.)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 30, | |
"text": [ | |
"array([ 0., 1., 2., 3., 4.])" | |
] | |
} | |
], | |
"prompt_number": 30 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"val = Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])\n", | |
"frame2['debt'] = val\n", | |
"frame2 " | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>year</th>\n", | |
" <th>state</th>\n", | |
" <th>pop</th>\n", | |
" <th>debt</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td> 2000</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.5</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.7</td>\n", | |
" <td>-1.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 3.6</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>four</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.4</td>\n", | |
" <td>-1.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>five</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.9</td>\n", | |
" <td>-1.7</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 31, | |
"text": [ | |
" year state pop debt\n", | |
"one 2000 Ohio 1.5 NaN\n", | |
"two 2001 Ohio 1.7 -1.2\n", | |
"three 2002 Ohio 3.6 NaN\n", | |
"four 2001 Nevada 2.4 -1.5\n", | |
"five 2002 Nevada 2.9 -1.7" | |
] | |
} | |
], | |
"prompt_number": 31 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame2['eastern'] = frame2.state == 'Ohio'\n", | |
"frame2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>year</th>\n", | |
" <th>state</th>\n", | |
" <th>pop</th>\n", | |
" <th>debt</th>\n", | |
" <th>eastern</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td> 2000</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.5</td>\n", | |
" <td> NaN</td>\n", | |
" <td> True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.7</td>\n", | |
" <td>-1.2</td>\n", | |
" <td> True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 3.6</td>\n", | |
" <td> NaN</td>\n", | |
" <td> True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>four</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.4</td>\n", | |
" <td>-1.5</td>\n", | |
" <td> False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>five</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.9</td>\n", | |
" <td>-1.7</td>\n", | |
" <td> False</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 35, | |
"text": [ | |
" year state pop debt eastern\n", | |
"one 2000 Ohio 1.5 NaN True\n", | |
"two 2001 Ohio 1.7 -1.2 True\n", | |
"three 2002 Ohio 3.6 NaN True\n", | |
"four 2001 Nevada 2.4 -1.5 False\n", | |
"five 2002 Nevada 2.9 -1.7 False" | |
] | |
} | |
], | |
"prompt_number": 35 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"del frame2['eastern']\n", | |
"frame2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>year</th>\n", | |
" <th>state</th>\n", | |
" <th>pop</th>\n", | |
" <th>debt</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td> 2000</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.5</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>two</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 1.7</td>\n", | |
" <td>-1.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Ohio</td>\n", | |
" <td> 3.6</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>four</th>\n", | |
" <td> 2001</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.4</td>\n", | |
" <td>-1.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>five</th>\n", | |
" <td> 2002</td>\n", | |
" <td> Nevada</td>\n", | |
" <td> 2.9</td>\n", | |
" <td>-1.7</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 36, | |
"text": [ | |
" year state pop debt\n", | |
"one 2000 Ohio 1.5 NaN\n", | |
"two 2001 Ohio 1.7 -1.2\n", | |
"three 2002 Ohio 3.6 NaN\n", | |
"four 2001 Nevada 2.4 -1.5\n", | |
"five 2002 Nevada 2.9 -1.7" | |
] | |
} | |
], | |
"prompt_number": 36 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pop = {'Nevada': {2001: 2.4, 2002: 2.9},\n", | |
" 'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}\n", | |
"frame3 = DataFrame(pop)\n", | |
"frame3" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Nevada</th>\n", | |
" <th>Ohio</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2000</th>\n", | |
" <td> NaN</td>\n", | |
" <td> 1.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2001</th>\n", | |
" <td> 2.4</td>\n", | |
" <td> 1.7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2002</th>\n", | |
" <td> 2.9</td>\n", | |
" <td> 3.6</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 205, | |
"text": [ | |
" Nevada Ohio\n", | |
"2000 NaN 1.5\n", | |
"2001 2.4 1.7\n", | |
"2002 2.9 3.6" | |
] | |
} | |
], | |
"prompt_number": 205 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame3.T" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>2000</th>\n", | |
" <th>2001</th>\n", | |
" <th>2002</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Nevada</th>\n", | |
" <td> NaN</td>\n", | |
" <td> 2.4</td>\n", | |
" <td> 2.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Ohio</th>\n", | |
" <td> 1.5</td>\n", | |
" <td> 1.7</td>\n", | |
" <td> 3.6</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 38, | |
"text": [ | |
" 2000 2001 2002\n", | |
"Nevada NaN 2.4 2.9\n", | |
"Ohio 1.5 1.7 3.6" | |
] | |
} | |
], | |
"prompt_number": 38 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"DataFrame(pop, index=[2001,2002,2003])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Nevada</th>\n", | |
" <th>Ohio</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2001</th>\n", | |
" <td> 2.4</td>\n", | |
" <td> 1.7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2002</th>\n", | |
" <td> 2.9</td>\n", | |
" <td> 3.6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2003</th>\n", | |
" <td> NaN</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 39, | |
"text": [ | |
" Nevada Ohio\n", | |
"2001 2.4 1.7\n", | |
"2002 2.9 3.6\n", | |
"2003 NaN NaN" | |
] | |
} | |
], | |
"prompt_number": 39 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pdata = {'Ohio': frame3['Ohio'][:-1],\n", | |
" 'Nevada': frame3['Nevada'][:2]}\n", | |
"DataFrame(pdata)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Nevada</th>\n", | |
" <th>Ohio</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2000</th>\n", | |
" <td> NaN</td>\n", | |
" <td> 1.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2001</th>\n", | |
" <td> 2.4</td>\n", | |
" <td> 1.7</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 40, | |
"text": [ | |
" Nevada Ohio\n", | |
"2000 NaN 1.5\n", | |
"2001 2.4 1.7" | |
] | |
} | |
], | |
"prompt_number": 40 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame3.index.name = 'year'\n", | |
"frame3.columns.name = 'state'\n", | |
"frame3" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>state</th>\n", | |
" <th>Nevada</th>\n", | |
" <th>Ohio</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>year</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2000</th>\n", | |
" <td> NaN</td>\n", | |
" <td> 1.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2001</th>\n", | |
" <td> 2.4</td>\n", | |
" <td> 1.7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2002</th>\n", | |
" <td> 2.9</td>\n", | |
" <td> 3.6</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 41, | |
"text": [ | |
"state Nevada Ohio\n", | |
"year \n", | |
"2000 NaN 1.5\n", | |
"2001 2.4 1.7\n", | |
"2002 2.9 3.6" | |
] | |
} | |
], | |
"prompt_number": 41 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame3.values" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 208, | |
"text": [ | |
"array([[ nan, 1.5],\n", | |
" [ 2.4, 1.7],\n", | |
" [ 2.9, 3.6]])" | |
] | |
} | |
], | |
"prompt_number": 208 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame2.values" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 43, | |
"text": [ | |
"array([[2000, Ohio, 1.5, nan],\n", | |
" [2001, Ohio, 1.7, -1.2],\n", | |
" [2002, Ohio, 3.6, nan],\n", | |
" [2001, Nevada, 2.4, -1.5],\n", | |
" [2002, Nevada, 2.9, -1.7]], dtype=object)" | |
] | |
} | |
], | |
"prompt_number": 43 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Index Objects\n", | |
"\n", | |
"* pandas\u2019s Index objects are responsible for holding the axis labels and other metadata (like the axis name or names).\n", | |
"* Any array or other sequence of labels used when constructing a Series or DataFrame is internally converted to an Index:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj = Series(range(3), index=['a', 'b', 'c'])\n", | |
"obj" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 209, | |
"text": [ | |
"a 0\n", | |
"b 1\n", | |
"c 2\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 209 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"index = obj.index\n", | |
"index" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 210, | |
"text": [ | |
"Index([a, b, c], dtype=object)" | |
] | |
} | |
], | |
"prompt_number": 210 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"index[1:]\n", | |
"\n", | |
"#index=[1,2,3]\n", | |
"#index\n", | |
"#obj.index = index\n", | |
"#obj.index" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 217, | |
"text": [ | |
"Int64Index([1, 2, 3], dtype=int64)" | |
] | |
} | |
], | |
"prompt_number": 217 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"index[1] = 'd'" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "Exception", | |
"evalue": "<class 'pandas.core.index.Index'> object is immutable", | |
"output_type": "pyerr", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mException\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-212-676fdeb26a68>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mindex\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'd'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m/Library/Python/2.7/site-packages/pandas/core/index.pyc\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 367\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 368\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 369\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m' object is immutable'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 370\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 371\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mException\u001b[0m: <class 'pandas.core.index.Index'> object is immutable" | |
] | |
} | |
], | |
"prompt_number": 212 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Immutability is important so that Index objects can be safely shared among data structures:\n", | |
"index = pd.Index(np.arange(3))\n", | |
"obj2 = Series([1.5, -2.5, 0], index=index)\n", | |
"obj2 " | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 48, | |
"text": [ | |
"0 1.5\n", | |
"1 -2.5\n", | |
"2 0.0\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 48 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj2.index is index" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 219, | |
"text": [ | |
"False" | |
] | |
} | |
], | |
"prompt_number": 219 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"dir(frame3.index)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 50, | |
"text": [ | |
"['T',\n", | |
" '__abs__',\n", | |
" '__add__',\n", | |
" '__and__',\n", | |
" '__array__',\n", | |
" '__array_finalize__',\n", | |
" '__array_interface__',\n", | |
" '__array_prepare__',\n", | |
" '__array_priority__',\n", | |
" '__array_struct__',\n", | |
" '__array_wrap__',\n", | |
" '__bytes__',\n", | |
" '__class__',\n", | |
" '__contains__',\n", | |
" '__copy__',\n", | |
" '__deepcopy__',\n", | |
" '__delattr__',\n", | |
" '__delitem__',\n", | |
" '__delslice__',\n", | |
" '__dict__',\n", | |
" '__div__',\n", | |
" '__divmod__',\n", | |
" '__doc__',\n", | |
" '__eq__',\n", | |
" '__float__',\n", | |
" '__floordiv__',\n", | |
" '__format__',\n", | |
" '__ge__',\n", | |
" '__getattribute__',\n", | |
" '__getitem__',\n", | |
" '__getslice__',\n", | |
" '__gt__',\n", | |
" '__hash__',\n", | |
" '__hex__',\n", | |
" '__iadd__',\n", | |
" '__iand__',\n", | |
" '__idiv__',\n", | |
" '__ifloordiv__',\n", | |
" '__ilshift__',\n", | |
" '__imod__',\n", | |
" '__imul__',\n", | |
" '__index__',\n", | |
" '__init__',\n", | |
" '__int__',\n", | |
" '__invert__',\n", | |
" '__ior__',\n", | |
" '__ipow__',\n", | |
" '__irshift__',\n", | |
" '__isub__',\n", | |
" '__iter__',\n", | |
" '__itruediv__',\n", | |
" '__ixor__',\n", | |
" '__le__',\n", | |
" '__len__',\n", | |
" '__long__',\n", | |
" '__lshift__',\n", | |
" '__lt__',\n", | |
" '__mod__',\n", | |
" '__module__',\n", | |
" '__mul__',\n", | |
" '__ne__',\n", | |
" '__neg__',\n", | |
" '__new__',\n", | |
" '__nonzero__',\n", | |
" '__oct__',\n", | |
" '__or__',\n", | |
" '__pos__',\n", | |
" '__pow__',\n", | |
" '__radd__',\n", | |
" '__rand__',\n", | |
" '__rdiv__',\n", | |
" '__rdivmod__',\n", | |
" '__reduce__',\n", | |
" '__reduce_ex__',\n", | |
" '__repr__',\n", | |
" '__rfloordiv__',\n", | |
" '__rlshift__',\n", | |
" '__rmod__',\n", | |
" '__rmul__',\n", | |
" '__ror__',\n", | |
" '__rpow__',\n", | |
" '__rrshift__',\n", | |
" '__rshift__',\n", | |
" '__rsub__',\n", | |
" '__rtruediv__',\n", | |
" '__rxor__',\n", | |
" '__setattr__',\n", | |
" '__setitem__',\n", | |
" '__setslice__',\n", | |
" '__setstate__',\n", | |
" '__sizeof__',\n", | |
" '__str__',\n", | |
" '__sub__',\n", | |
" '__subclasshook__',\n", | |
" '__truediv__',\n", | |
" '__unicode__',\n", | |
" '__xor__',\n", | |
" '_array_values',\n", | |
" '_arrmap',\n", | |
" '_assert_can_do_setop',\n", | |
" '_box_scalars',\n", | |
" '_cache',\n", | |
" '_cleanup',\n", | |
" '_constructor',\n", | |
" '_engine',\n", | |
" '_engine_type',\n", | |
" '_ensure_compat_concat',\n", | |
" '_format_native_types',\n", | |
" '_format_with_header',\n", | |
" '_get_duplicates',\n", | |
" '_get_level_number',\n", | |
" '_get_method',\n", | |
" '_get_names',\n", | |
" '_groupby',\n", | |
" '_has_complex_internals',\n", | |
" '_inner_indexer',\n", | |
" '_join_level',\n", | |
" '_join_monotonic',\n", | |
" '_join_non_unique',\n", | |
" '_join_precedence',\n", | |
" '_left_indexer',\n", | |
" '_left_indexer_unique',\n", | |
" '_mpl_repr',\n", | |
" '_outer_indexer',\n", | |
" '_possibly_promote',\n", | |
" '_set_names',\n", | |
" '_shallow_copy',\n", | |
" '_wrap_joined_index',\n", | |
" '_wrap_union_result',\n", | |
" 'all',\n", | |
" 'any',\n", | |
" 'append',\n", | |
" 'argmax',\n", | |
" 'argmin',\n", | |
" 'argsort',\n", | |
" 'asi8',\n", | |
" 'asof',\n", | |
" 'asof_locs',\n", | |
" 'astype',\n", | |
" 'base',\n", | |
" 'byteswap',\n", | |
" 'choose',\n", | |
" 'clip',\n", | |
" 'compress',\n", | |
" 'conj',\n", | |
" 'conjugate',\n", | |
" 'copy',\n", | |
" 'ctypes',\n", | |
" 'cumprod',\n", | |
" 'cumsum',\n", | |
" 'data',\n", | |
" 'delete',\n", | |
" 'diagonal',\n", | |
" 'diff',\n", | |
" 'dot',\n", | |
" 'drop',\n", | |
" 'dtype',\n", | |
" 'dump',\n", | |
" 'dumps',\n", | |
" 'equals',\n", | |
" 'fill',\n", | |
" 'flags',\n", | |
" 'flat',\n", | |
" 'flatten',\n", | |
" 'format',\n", | |
" 'get_duplicates',\n", | |
" 'get_indexer',\n", | |
" 'get_level_values',\n", | |
" 'get_loc',\n", | |
" 'get_value',\n", | |
" 'getfield',\n", | |
" 'groupby',\n", | |
" 'holds_integer',\n", | |
" 'imag',\n", | |
" 'inferred_type',\n", | |
" 'insert',\n", | |
" 'intersection',\n", | |
" 'is_all_dates',\n", | |
" 'is_lexsorted_for_tuple',\n", | |
" 'is_monotonic',\n", | |
" 'is_numeric',\n", | |
" 'is_type_compatible',\n", | |
" 'is_unique',\n", | |
" 'isin',\n", | |
" 'item',\n", | |
" 'itemset',\n", | |
" 'itemsize',\n", | |
" 'join',\n", | |
" 'map',\n", | |
" 'max',\n", | |
" 'mean',\n", | |
" 'min',\n", | |
" 'name',\n", | |
" 'names',\n", | |
" 'nbytes',\n", | |
" 'ndim',\n", | |
" 'newbyteorder',\n", | |
" 'nlevels',\n", | |
" 'nonzero',\n", | |
" 'order',\n", | |
" 'prod',\n", | |
" 'ptp',\n", | |
" 'put',\n", | |
" 'ravel',\n", | |
" 'real',\n", | |
" 'reindex',\n", | |
" 'repeat',\n", | |
" 'reshape',\n", | |
" 'resize',\n", | |
" 'round',\n", | |
" 'searchsorted',\n", | |
" 'set_value',\n", | |
" 'setasflat',\n", | |
" 'setfield',\n", | |
" 'setflags',\n", | |
" 'shape',\n", | |
" 'shift',\n", | |
" 'size',\n", | |
" 'slice_indexer',\n", | |
" 'slice_locs',\n", | |
" 'sort',\n", | |
" 'squeeze',\n", | |
" 'std',\n", | |
" 'strides',\n", | |
" 'sum',\n", | |
" 'summary',\n", | |
" 'swapaxes',\n", | |
" 'take',\n", | |
" 'to_datetime',\n", | |
" 'to_native_types',\n", | |
" 'to_series',\n", | |
" 'tofile',\n", | |
" 'tolist',\n", | |
" 'tostring',\n", | |
" 'trace',\n", | |
" 'transpose',\n", | |
" 'union',\n", | |
" 'unique',\n", | |
" 'values',\n", | |
" 'var',\n", | |
" 'view']" | |
] | |
} | |
], | |
"prompt_number": 50 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Essential Functionality\n", | |
"\n", | |
"### Reindexing\n", | |
"\n", | |
"A critical method on pandas objects is reindex, which means to create a new object with the data conformed to a new index." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj = Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])\n", | |
"obj" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 51, | |
"text": [ | |
"d 4.5\n", | |
"b 7.2\n", | |
"a -5.3\n", | |
"c 3.6\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 51 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])\n", | |
"obj2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 52, | |
"text": [ | |
"a -5.3\n", | |
"b 7.2\n", | |
"c 3.6\n", | |
"d 4.5\n", | |
"e NaN\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 52 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=0)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 53, | |
"text": [ | |
"a -5.3\n", | |
"b 7.2\n", | |
"c 3.6\n", | |
"d 4.5\n", | |
"e 0.0\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 53 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj3 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 5])\n", | |
"obj3" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 220, | |
"text": [ | |
"0 blue\n", | |
"2 purple\n", | |
"5 yellow\n", | |
"dtype: object" | |
] | |
} | |
], | |
"prompt_number": 220 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj3.reindex(range(6), method='ffill')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 221, | |
"text": [ | |
"0 blue\n", | |
"1 blue\n", | |
"2 purple\n", | |
"3 purple\n", | |
"4 purple\n", | |
"5 yellow\n", | |
"dtype: object" | |
] | |
} | |
], | |
"prompt_number": 221 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame = DataFrame(np.arange(9).reshape((3, 3)), index=['a', 'c', 'd'],\n", | |
" columns=['Ohio', 'Texas', 'California'])\n", | |
"frame" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Ohio</th>\n", | |
" <th>Texas</th>\n", | |
" <th>California</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" <td> 8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 223, | |
"text": [ | |
" Ohio Texas California\n", | |
"a 0 1 2\n", | |
"c 3 4 5\n", | |
"d 6 7 8" | |
] | |
} | |
], | |
"prompt_number": 223 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# With DataFrame, reindex can alter either the (row) index, columns, or both. \n", | |
"# When passed just a sequence, the rows are reindexed in the result:\n", | |
"\n", | |
"frame2 = frame.reindex(['a', 'b', 'c', 'd'])\n", | |
"frame2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Ohio</th>\n", | |
" <th>Texas</th>\n", | |
" <th>California</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" <td> 8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 226, | |
"text": [ | |
" Ohio Texas California\n", | |
"a 0 1 2\n", | |
"b NaN NaN NaN\n", | |
"c 3 4 5\n", | |
"d 6 7 8" | |
] | |
} | |
], | |
"prompt_number": 226 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"states = ['Texas', 'Utah', 'California']\n", | |
"frame.reindex(columns=states)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Texas</th>\n", | |
" <th>Utah</th>\n", | |
" <th>California</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td> 1</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <td> 4</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <td> 7</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 60, | |
"text": [ | |
" Texas Utah California\n", | |
"a 1 NaN 2\n", | |
"c 4 NaN 5\n", | |
"d 7 NaN 8" | |
] | |
} | |
], | |
"prompt_number": 60 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.reindex(index=['a', 'b', 'c', 'd'], method='ffill', columns=states)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Texas</th>\n", | |
" <th>Utah</th>\n", | |
" <th>California</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td> 1</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td> 1</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <td> 4</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <td> 7</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 230, | |
"text": [ | |
" Texas Utah California\n", | |
"a 1 NaN 2\n", | |
"b 1 NaN 2\n", | |
"c 4 NaN 5\n", | |
"d 7 NaN 8" | |
] | |
} | |
], | |
"prompt_number": 230 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Ohio</th>\n", | |
" <th>Texas</th>\n", | |
" <th>California</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" <td> 8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 63, | |
"text": [ | |
" Ohio Texas California\n", | |
"a 0 1 2\n", | |
"c 3 4 5\n", | |
"d 6 7 8" | |
] | |
} | |
], | |
"prompt_number": 63 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.ix[['a', 'b', 'c', 'd'], states]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Texas</th>\n", | |
" <th>Utah</th>\n", | |
" <th>California</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td> 1</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <td> 4</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <td> 7</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 66, | |
"text": [ | |
" Texas Utah California\n", | |
"a 1 NaN 2\n", | |
"b NaN NaN NaN\n", | |
"c 4 NaN 5\n", | |
"d 7 NaN 8" | |
] | |
} | |
], | |
"prompt_number": 66 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Dropping entries from an axis\n", | |
"\n", | |
"Dropping one or more entries from an axis is easy if you have an index array or list without those entries. As that can require a bit of munging and set logic, the drop method will return a new object with the indicated value or values deleted from an axis:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj = Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])\n", | |
"new_obj = obj.drop('c')\n", | |
"new_obj" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 232, | |
"text": [ | |
"a 0\n", | |
"b 1\n", | |
"d 3\n", | |
"e 4\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 232 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj.drop(['d', 'c'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 68, | |
"text": [ | |
"a 0\n", | |
"b 1\n", | |
"e 4\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 68 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data = DataFrame(np.arange(16).reshape((4, 4)),\n", | |
" index=['Ohio', 'Colorado', 'Utah', 'New York'],\n", | |
" columns=['one', 'two', 'three', 'four'])\n", | |
"data.drop(['Colorado', 'Ohio'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>one</th>\n", | |
" <th>two</th>\n", | |
" <th>three</th>\n", | |
" <th>four</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Utah</th>\n", | |
" <td> 8</td>\n", | |
" <td> 9</td>\n", | |
" <td> 10</td>\n", | |
" <td> 11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>New York</th>\n", | |
" <td> 12</td>\n", | |
" <td> 13</td>\n", | |
" <td> 14</td>\n", | |
" <td> 15</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 234, | |
"text": [ | |
" one two three four\n", | |
"Utah 8 9 10 11\n", | |
"New York 12 13 14 15" | |
] | |
} | |
], | |
"prompt_number": 234 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data.drop('two', axis=1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>one</th>\n", | |
" <th>three</th>\n", | |
" <th>four</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Ohio</th>\n", | |
" <td> 0</td>\n", | |
" <td> 2</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Colorado</th>\n", | |
" <td> 4</td>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Utah</th>\n", | |
" <td> 8</td>\n", | |
" <td> 10</td>\n", | |
" <td> 11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>New York</th>\n", | |
" <td> 12</td>\n", | |
" <td> 14</td>\n", | |
" <td> 15</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 70, | |
"text": [ | |
" one three four\n", | |
"Ohio 0 2 3\n", | |
"Colorado 4 6 7\n", | |
"Utah 8 10 11\n", | |
"New York 12 14 15" | |
] | |
} | |
], | |
"prompt_number": 70 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Indexing, selection, and filtering\n", | |
"\n", | |
"Series indexing (obj[...]) works analogously to NumPy array indexing, except you can use the Series\u2019s index values instead of only integers. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj = Series(np.arange(4.), index=['a', 'b', 'c', 'd'])\n", | |
"obj" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 71, | |
"text": [ | |
"a 0\n", | |
"b 1\n", | |
"c 2\n", | |
"d 3\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 71 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print obj['b']\n", | |
"print \n", | |
"print obj[1]\n", | |
"print \n", | |
"print type(obj[2])\n", | |
"print\n", | |
"print type(obj[2:4])\n", | |
"print \n", | |
"print obj[['b','a','d']]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"1.0\n", | |
"\n", | |
"1.0\n", | |
"\n", | |
"<type 'numpy.float64'>\n", | |
"\n", | |
"<class 'pandas.core.series.Series'>\n", | |
"\n", | |
"b 1\n", | |
"a 0\n", | |
"d 3\n", | |
"dtype: float64\n" | |
] | |
} | |
], | |
"prompt_number": 236 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj['b':'c'] = 5\n", | |
"obj" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 74, | |
"text": [ | |
"a 0\n", | |
"b 5\n", | |
"c 5\n", | |
"d 3\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 74 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data = DataFrame(np.arange(16).reshape((4, 4)),\n", | |
" index=['Ohio', 'Colorado', 'Utah', 'New York'],\n", | |
" columns=['one', 'two', 'three', 'four'])\n", | |
"data" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>one</th>\n", | |
" <th>two</th>\n", | |
" <th>three</th>\n", | |
" <th>four</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Ohio</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Colorado</th>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Utah</th>\n", | |
" <td> 8</td>\n", | |
" <td> 9</td>\n", | |
" <td> 10</td>\n", | |
" <td> 11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>New York</th>\n", | |
" <td> 12</td>\n", | |
" <td> 13</td>\n", | |
" <td> 14</td>\n", | |
" <td> 15</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 91, | |
"text": [ | |
" one two three four\n", | |
"Ohio 0 1 2 3\n", | |
"Colorado 4 5 6 7\n", | |
"Utah 8 9 10 11\n", | |
"New York 12 13 14 15" | |
] | |
} | |
], | |
"prompt_number": 91 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print data[:2]\n", | |
"print \n", | |
"print data[data['three'] > 5]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" one two three four\n", | |
"Ohio 0 1 2 3\n", | |
"Colorado 4 5 6 7\n", | |
"\n", | |
" one two three four\n", | |
"Colorado 4 5 6 7\n", | |
"Utah 8 9 10 11\n", | |
"New York 12 13 14 15\n" | |
] | |
} | |
], | |
"prompt_number": 92 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data < 5" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>one</th>\n", | |
" <th>two</th>\n", | |
" <th>three</th>\n", | |
" <th>four</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Ohio</th>\n", | |
" <td> True</td>\n", | |
" <td> True</td>\n", | |
" <td> True</td>\n", | |
" <td> True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Colorado</th>\n", | |
" <td> True</td>\n", | |
" <td> False</td>\n", | |
" <td> False</td>\n", | |
" <td> False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Utah</th>\n", | |
" <td> False</td>\n", | |
" <td> False</td>\n", | |
" <td> False</td>\n", | |
" <td> False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>New York</th>\n", | |
" <td> False</td>\n", | |
" <td> False</td>\n", | |
" <td> False</td>\n", | |
" <td> False</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 238, | |
"text": [ | |
" one two three four\n", | |
"Ohio True True True True\n", | |
"Colorado True False False False\n", | |
"Utah False False False False\n", | |
"New York False False False False" | |
] | |
} | |
], | |
"prompt_number": 238 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data[data < 5] = 0\n", | |
"data" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>one</th>\n", | |
" <th>two</th>\n", | |
" <th>three</th>\n", | |
" <th>four</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Ohio</th>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Colorado</th>\n", | |
" <td> 0</td>\n", | |
" <td> 5</td>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Utah</th>\n", | |
" <td> 8</td>\n", | |
" <td> 9</td>\n", | |
" <td> 10</td>\n", | |
" <td> 11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>New York</th>\n", | |
" <td> 12</td>\n", | |
" <td> 13</td>\n", | |
" <td> 14</td>\n", | |
" <td> 15</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 94, | |
"text": [ | |
" one two three four\n", | |
"Ohio 0 0 0 0\n", | |
"Colorado 0 5 6 7\n", | |
"Utah 8 9 10 11\n", | |
"New York 12 13 14 15" | |
] | |
} | |
], | |
"prompt_number": 94 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data.ix['Colorado', ['two', 'three']]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 240, | |
"text": [ | |
"two 5\n", | |
"three 6\n", | |
"Name: Colorado, dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 240 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data.ix[['Colorado', 'Utah'], [3, 0, 1]]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>four</th>\n", | |
" <th>one</th>\n", | |
" <th>two</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Colorado</th>\n", | |
" <td> 7</td>\n", | |
" <td> 0</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Utah</th>\n", | |
" <td> 11</td>\n", | |
" <td> 8</td>\n", | |
" <td> 9</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 96, | |
"text": [ | |
" four one two\n", | |
"Colorado 7 0 5\n", | |
"Utah 11 8 9" | |
] | |
} | |
], | |
"prompt_number": 96 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print data.ix[2]\n", | |
"print \n", | |
"print data.ix[:'Utah', 'two']\n", | |
"print \n", | |
"print data.ix[data.three > 5, :3]\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"one 8\n", | |
"two 9\n", | |
"three 10\n", | |
"four 11\n", | |
"Name: Utah, dtype: int64\n", | |
"\n", | |
"Ohio 0\n", | |
"Colorado 5\n", | |
"Utah 9\n", | |
"Name: two, dtype: int64\n", | |
"\n", | |
" one two three\n", | |
"Colorado 0 5 6\n", | |
"Utah 8 9 10\n", | |
"New York 12 13 14\n" | |
] | |
} | |
], | |
"prompt_number": 97 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Arithmetic and data alignment\n", | |
"\n", | |
"One of the most important pandas features is the behavior of arithmetic between objects with different indexes. When adding together objects, if any index pairs are not the same, the respective index in the result will be the union of the index pairs." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"s1 = Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])\n", | |
"s2 = Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])\n", | |
"s1+s2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 241, | |
"text": [ | |
"a 5.2\n", | |
"c 1.1\n", | |
"d NaN\n", | |
"e 0.0\n", | |
"f NaN\n", | |
"g NaN\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 241 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Arithmetic methods with fill values\n", | |
"\n", | |
"df1 = DataFrame(np.arange(9.).reshape((3, 3)), columns=list('bcd'), index=['Ohio', 'Texas', 'Colorado'])\n", | |
"df2 = DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])\n", | |
"print df1\n", | |
"print \n", | |
"print df2\n", | |
"print \n", | |
"print df1+df2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" b c d\n", | |
"Ohio 0 1 2\n", | |
"Texas 3 4 5\n", | |
"Colorado 6 7 8\n", | |
"\n", | |
" b d e\n", | |
"Utah 0 1 2\n", | |
"Ohio 3 4 5\n", | |
"Texas 6 7 8\n", | |
"Oregon 9 10 11\n", | |
"\n", | |
" b c d e\n", | |
"Colorado NaN NaN NaN NaN\n", | |
"Ohio 3 NaN 6 NaN\n", | |
"Oregon NaN NaN NaN NaN\n", | |
"Texas 9 NaN 12 NaN\n", | |
"Utah NaN NaN NaN NaN\n" | |
] | |
} | |
], | |
"prompt_number": 242 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df1 = DataFrame(np.arange(12.).reshape((3, 4)), columns=list('abcd')) \n", | |
"df2 = DataFrame(np.arange(20.).reshape((4, 5)), columns=list('abcde'))\n", | |
"print df1\n", | |
"print \n", | |
"print df2\n", | |
"print \n", | |
"print df1 + df2 " | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" a b c d\n", | |
"0 0 1 2 3\n", | |
"1 4 5 6 7\n", | |
"2 8 9 10 11\n", | |
"\n", | |
" a b c d e\n", | |
"0 0 1 2 3 4\n", | |
"1 5 6 7 8 9\n", | |
"2 10 11 12 13 14\n", | |
"3 15 16 17 18 19\n", | |
"\n", | |
" a b c d e\n", | |
"0 0 2 4 6 NaN\n", | |
"1 9 11 13 15 NaN\n", | |
"2 18 20 22 24 NaN\n", | |
"3 NaN NaN NaN NaN NaN\n" | |
] | |
} | |
], | |
"prompt_number": 243 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df1.add(df2, fill_value=0)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 0</td>\n", | |
" <td> 2</td>\n", | |
" <td> 4</td>\n", | |
" <td> 6</td>\n", | |
" <td> 4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 9</td>\n", | |
" <td> 11</td>\n", | |
" <td> 13</td>\n", | |
" <td> 15</td>\n", | |
" <td> 9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 18</td>\n", | |
" <td> 20</td>\n", | |
" <td> 22</td>\n", | |
" <td> 24</td>\n", | |
" <td> 14</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 15</td>\n", | |
" <td> 16</td>\n", | |
" <td> 17</td>\n", | |
" <td> 18</td>\n", | |
" <td> 19</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 104, | |
"text": [ | |
" a b c d e\n", | |
"0 0 2 4 6 4\n", | |
"1 9 11 13 15 9\n", | |
"2 18 20 22 24 14\n", | |
"3 15 16 17 18 19" | |
] | |
} | |
], | |
"prompt_number": 104 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df1.reindex(columns=df2.columns, fill_value=0)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" <td> 3</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 8</td>\n", | |
" <td> 9</td>\n", | |
" <td> 10</td>\n", | |
" <td> 11</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 105, | |
"text": [ | |
" a b c d e\n", | |
"0 0 1 2 3 0\n", | |
"1 4 5 6 7 0\n", | |
"2 8 9 10 11 0" | |
] | |
} | |
], | |
"prompt_number": 105 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Operations between DataFrame and Series\n", | |
"\n", | |
"arr = np.arange(12.).reshape((3, 4))\n", | |
"print arr\n", | |
"print \n", | |
"print arr[0]\n", | |
"print \n", | |
"print arr - arr[0]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"[[ 0. 1. 2. 3.]\n", | |
" [ 4. 5. 6. 7.]\n", | |
" [ 8. 9. 10. 11.]]\n", | |
"\n", | |
"[ 0. 1. 2. 3.]\n", | |
"\n", | |
"[[ 0. 0. 0. 0.]\n", | |
" [ 4. 4. 4. 4.]\n", | |
" [ 8. 8. 8. 8.]]\n" | |
] | |
} | |
], | |
"prompt_number": 108 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame = DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),\n", | |
" index=['Utah', 'Ohio', 'Texas', 'Oregon'])\n", | |
"\n", | |
"series = frame.ix[0]\n", | |
"\n", | |
"print frame\n", | |
"print \n", | |
"print series \n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" b d e\n", | |
"Utah 0 1 2\n", | |
"Ohio 3 4 5\n", | |
"Texas 6 7 8\n", | |
"Oregon 9 10 11\n", | |
"\n", | |
"b 0\n", | |
"d 1\n", | |
"e 2\n", | |
"Name: Utah, dtype: float64\n" | |
] | |
} | |
], | |
"prompt_number": 111 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame - series" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>b</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Utah</th>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Ohio</th>\n", | |
" <td> 3</td>\n", | |
" <td> 3</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Texas</th>\n", | |
" <td> 6</td>\n", | |
" <td> 6</td>\n", | |
" <td> 6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Oregon</th>\n", | |
" <td> 9</td>\n", | |
" <td> 9</td>\n", | |
" <td> 9</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 112, | |
"text": [ | |
" b d e\n", | |
"Utah 0 0 0\n", | |
"Ohio 3 3 3\n", | |
"Texas 6 6 6\n", | |
"Oregon 9 9 9" | |
] | |
} | |
], | |
"prompt_number": 112 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"series2 = Series(range(3), index=['b', 'e', 'f'])\n", | |
"frame + series2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>b</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" <th>f</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Utah</th>\n", | |
" <td> 0</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 3</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Ohio</th>\n", | |
" <td> 3</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 6</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Texas</th>\n", | |
" <td> 6</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 9</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Oregon</th>\n", | |
" <td> 9</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 12</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 113, | |
"text": [ | |
" b d e f\n", | |
"Utah 0 NaN 3 NaN\n", | |
"Ohio 3 NaN 6 NaN\n", | |
"Texas 6 NaN 9 NaN\n", | |
"Oregon 9 NaN 12 NaN" | |
] | |
} | |
], | |
"prompt_number": 113 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"series3 = frame['d']\n", | |
"series3" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 114, | |
"text": [ | |
"Utah 1\n", | |
"Ohio 4\n", | |
"Texas 7\n", | |
"Oregon 10\n", | |
"Name: d, dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 114 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.sub(series3, axis=0)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>b</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Utah</th>\n", | |
" <td>-1</td>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Ohio</th>\n", | |
" <td>-1</td>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Texas</th>\n", | |
" <td>-1</td>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Oregon</th>\n", | |
" <td>-1</td>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 115, | |
"text": [ | |
" b d e\n", | |
"Utah -1 0 1\n", | |
"Ohio -1 0 1\n", | |
"Texas -1 0 1\n", | |
"Oregon -1 0 1" | |
] | |
} | |
], | |
"prompt_number": 115 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Function application and mapping\n", | |
"\n", | |
"NumPy ufuncs (element-wise array methods) work fine with pandas objects:\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame = DataFrame(np.random.randn(4, 3), columns=list('bde'),\n", | |
" index=['Utah', 'Ohio', 'Texas', 'Oregon'])\n", | |
"frame" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>b</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Utah</th>\n", | |
" <td> 0.531372</td>\n", | |
" <td> 0.016855</td>\n", | |
" <td> 0.006387</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Ohio</th>\n", | |
" <td> 0.253249</td>\n", | |
" <td> 0.679284</td>\n", | |
" <td> 1.249851</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Texas</th>\n", | |
" <td> 1.393670</td>\n", | |
" <td> 0.544011</td>\n", | |
" <td>-0.598251</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Oregon</th>\n", | |
" <td>-1.405705</td>\n", | |
" <td> 0.481606</td>\n", | |
" <td> 0.755763</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 116, | |
"text": [ | |
" b d e\n", | |
"Utah 0.531372 0.016855 0.006387\n", | |
"Ohio 0.253249 0.679284 1.249851\n", | |
"Texas 1.393670 0.544011 -0.598251\n", | |
"Oregon -1.405705 0.481606 0.755763" | |
] | |
} | |
], | |
"prompt_number": 116 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"np.abs(frame)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>b</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Utah</th>\n", | |
" <td> 0.531372</td>\n", | |
" <td> 0.016855</td>\n", | |
" <td> 0.006387</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Ohio</th>\n", | |
" <td> 0.253249</td>\n", | |
" <td> 0.679284</td>\n", | |
" <td> 1.249851</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Texas</th>\n", | |
" <td> 1.393670</td>\n", | |
" <td> 0.544011</td>\n", | |
" <td> 0.598251</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Oregon</th>\n", | |
" <td> 1.405705</td>\n", | |
" <td> 0.481606</td>\n", | |
" <td> 0.755763</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 117, | |
"text": [ | |
" b d e\n", | |
"Utah 0.531372 0.016855 0.006387\n", | |
"Ohio 0.253249 0.679284 1.249851\n", | |
"Texas 1.393670 0.544011 0.598251\n", | |
"Oregon 1.405705 0.481606 0.755763" | |
] | |
} | |
], | |
"prompt_number": 117 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"f = lambda x: x.max() - x.min()\n", | |
"\n", | |
"frame.apply(f)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 118, | |
"text": [ | |
"b 2.799376\n", | |
"d 0.662429\n", | |
"e 1.848102\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 118 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.apply(f, axis=1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 119, | |
"text": [ | |
"Utah 0.524986\n", | |
"Ohio 0.996602\n", | |
"Texas 1.991921\n", | |
"Oregon 2.161469\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 119 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def f(x):\n", | |
" return Series([x.min(), x.max()], index=['min', 'max'])\n", | |
"frame.apply(f)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>b</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>-1.405705</td>\n", | |
" <td> 0.016855</td>\n", | |
" <td>-0.598251</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 1.393670</td>\n", | |
" <td> 0.679284</td>\n", | |
" <td> 1.249851</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 124, | |
"text": [ | |
" b d e\n", | |
"min -1.405705 0.016855 -0.598251\n", | |
"max 1.393670 0.679284 1.249851" | |
] | |
} | |
], | |
"prompt_number": 124 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"format = lambda x: '%.2f' % x\n", | |
"frame.applymap(format)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>b</th>\n", | |
" <th>d</th>\n", | |
" <th>e</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Utah</th>\n", | |
" <td> 0.53</td>\n", | |
" <td> 0.02</td>\n", | |
" <td> 0.01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Ohio</th>\n", | |
" <td> 0.25</td>\n", | |
" <td> 0.68</td>\n", | |
" <td> 1.25</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Texas</th>\n", | |
" <td> 1.39</td>\n", | |
" <td> 0.54</td>\n", | |
" <td> -0.60</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Oregon</th>\n", | |
" <td> -1.41</td>\n", | |
" <td> 0.48</td>\n", | |
" <td> 0.76</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 125, | |
"text": [ | |
" b d e\n", | |
"Utah 0.53 0.02 0.01\n", | |
"Ohio 0.25 0.68 1.25\n", | |
"Texas 1.39 0.54 -0.60\n", | |
"Oregon -1.41 0.48 0.76" | |
] | |
} | |
], | |
"prompt_number": 125 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame['e'].map(format)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 126, | |
"text": [ | |
"Utah 0.01\n", | |
"Ohio 1.25\n", | |
"Texas -0.60\n", | |
"Oregon 0.76\n", | |
"Name: e, dtype: object" | |
] | |
} | |
], | |
"prompt_number": 126 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Sorting and ranking \n", | |
"\n", | |
"Sorting a data set by some criterion is another important built-in operation. To sort lexicographically by row or column index, use the sort_index method, which returns a new, sorted object:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj = Series(range(4), index=['d', 'a', 'b', 'c'])\n", | |
"obj.sort_index()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 127, | |
"text": [ | |
"a 1\n", | |
"b 2\n", | |
"c 3\n", | |
"d 0\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 127 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame = DataFrame(np.arange(8).reshape((2, 4)), \n", | |
" index=['three', 'one'], \n", | |
" columns=['d', 'a', 'b', 'c'])\n", | |
"\n", | |
"frame" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>d</th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 137, | |
"text": [ | |
" d a b c\n", | |
"three 0 1 2 3\n", | |
"one 4 5 6 7" | |
] | |
} | |
], | |
"prompt_number": 137 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.sort_index()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>d</th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 138, | |
"text": [ | |
" d a b c\n", | |
"one 4 5 6 7\n", | |
"three 0 1 2 3" | |
] | |
} | |
], | |
"prompt_number": 138 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.sort_index(axis=1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" <td> 3</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td> 5</td>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" <td> 4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 139, | |
"text": [ | |
" a b c d\n", | |
"three 1 2 3 0\n", | |
"one 5 6 7 4" | |
] | |
} | |
], | |
"prompt_number": 139 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.sort_index(axis=0)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>d</th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 140, | |
"text": [ | |
" d a b c\n", | |
"one 4 5 6 7\n", | |
"three 0 1 2 3" | |
] | |
} | |
], | |
"prompt_number": 140 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.sort_index(axis=2)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "No axis named 2", | |
"output_type": "pyerr", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-141-c7a53c74c9ed>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mframe\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m/Library/Python/2.7/site-packages/pandas/core/frame.pyc\u001b[0m in \u001b[0;36msort_index\u001b[0;34m(self, axis, by, ascending, inplace)\u001b[0m\n\u001b[1;32m 3126\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0m_lexsort_indexer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3127\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3128\u001b[0;31m \u001b[0maxis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_axis_number\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3129\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0maxis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3130\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Axis must be 0 or 1, got %s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Library/Python/2.7/site-packages/pandas/core/generic.pyc\u001b[0m in \u001b[0;36m_get_axis_number\u001b[0;34m(self, axis)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 47\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'No axis named %s'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 48\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_get_axis_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mValueError\u001b[0m: No axis named 2" | |
] | |
} | |
], | |
"prompt_number": 141 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.sort_index(axis=1, ascending=False)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>d</th>\n", | |
" <th>c</th>\n", | |
" <th>b</th>\n", | |
" <th>a</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>three</th>\n", | |
" <td> 0</td>\n", | |
" <td> 3</td>\n", | |
" <td> 2</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>one</th>\n", | |
" <td> 4</td>\n", | |
" <td> 7</td>\n", | |
" <td> 6</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 142, | |
"text": [ | |
" d c b a\n", | |
"three 0 3 2 1\n", | |
"one 4 7 6 5" | |
] | |
} | |
], | |
"prompt_number": 142 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj = Series([4, 7, -3, 2])\n", | |
"obj" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 143, | |
"text": [ | |
"0 4\n", | |
"1 7\n", | |
"2 -3\n", | |
"3 2\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 143 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# To sort a Series by its values, use its order method:\n", | |
"obj.order()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 144, | |
"text": [ | |
"2 -3\n", | |
"3 2\n", | |
"0 4\n", | |
"1 7\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 144 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Any missing values are sorted to the end of the Series by default:\n", | |
"obj = Series([4, np.nan, 7, np.nan, -3, 2])\n", | |
"obj.order()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 145, | |
"text": [ | |
"4 -3\n", | |
"5 2\n", | |
"0 4\n", | |
"2 7\n", | |
"1 NaN\n", | |
"3 NaN\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 145 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# you may want to sort by the values in one or more columns. \n", | |
"frame = DataFrame({'b': [4, 7, -3, 2], 'a': [0, 1, 0, 1]})\n", | |
"\n", | |
"print frame.sort_index(ascending=False)\n", | |
"print \n", | |
"print frame.sort_index(by='b')\n", | |
"print \n", | |
"print frame.sort_index(by=['a','b'])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" a b\n", | |
"3 1 2\n", | |
"2 0 -3\n", | |
"1 1 7\n", | |
"0 0 4\n", | |
"\n", | |
" a b\n", | |
"2 0 -3\n", | |
"3 1 2\n", | |
"0 0 4\n", | |
"1 1 7\n", | |
"\n", | |
" a b\n", | |
"2 0 -3\n", | |
"0 0 4\n", | |
"3 1 2\n", | |
"1 1 7\n" | |
] | |
} | |
], | |
"prompt_number": 149 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"__Ranking__ is closely related to sorting, assigning ranks from one through the number of valid data points in an array. It is similar to the indirect sort indices produced by __numpy.argsort__, except that ties are broken according to a rule. The __rank__ methods for Series and DataFrame are the place to look; by default __rank__ breaks ties by assigning each group the mean rank:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj = Series([7, -5, 7, 4, 2, 0, 4])\n", | |
"obj" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 150, | |
"text": [ | |
"0 7\n", | |
"1 -5\n", | |
"2 7\n", | |
"3 4\n", | |
"4 2\n", | |
"5 0\n", | |
"6 4\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 150 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj.rank()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 151, | |
"text": [ | |
"0 6.5\n", | |
"1 1.0\n", | |
"2 6.5\n", | |
"3 4.5\n", | |
"4 3.0\n", | |
"5 2.0\n", | |
"6 4.5\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 151 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj.order()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 152, | |
"text": [ | |
"1 -5\n", | |
"5 0\n", | |
"4 2\n", | |
"3 4\n", | |
"6 4\n", | |
"0 7\n", | |
"2 7\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 152 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj.rank(method='first')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 153, | |
"text": [ | |
"0 6\n", | |
"1 1\n", | |
"2 7\n", | |
"3 4\n", | |
"4 3\n", | |
"5 2\n", | |
"6 5\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 153 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj.rank(ascending=False, method='max')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 157, | |
"text": [ | |
"0 2\n", | |
"1 7\n", | |
"2 2\n", | |
"3 4\n", | |
"4 5\n", | |
"5 6\n", | |
"6 4\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 157 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame = DataFrame({'b': [4.3, 7, -3, 2], 'a': [0, 1, 0, 1], \n", | |
" 'c': [-2, 5, 8, -2.5]})\n", | |
"frame" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 0</td>\n", | |
" <td> 4.3</td>\n", | |
" <td>-2.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 1</td>\n", | |
" <td> 7.0</td>\n", | |
" <td> 5.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 0</td>\n", | |
" <td>-3.0</td>\n", | |
" <td> 8.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 1</td>\n", | |
" <td> 2.0</td>\n", | |
" <td>-2.5</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 158, | |
"text": [ | |
" a b c\n", | |
"0 0 4.3 -2.0\n", | |
"1 1 7.0 5.0\n", | |
"2 0 -3.0 8.0\n", | |
"3 1 2.0 -2.5" | |
] | |
} | |
], | |
"prompt_number": 158 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.rank(axis=1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 2</td>\n", | |
" <td> 3</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 1</td>\n", | |
" <td> 3</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 2</td>\n", | |
" <td> 1</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 2</td>\n", | |
" <td> 3</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 159, | |
"text": [ | |
" a b c\n", | |
"0 2 3 1\n", | |
"1 1 3 2\n", | |
"2 2 1 3\n", | |
"3 2 3 1" | |
] | |
} | |
], | |
"prompt_number": 159 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Axis indexes with duplicate values\n", | |
"\n", | |
"Up until now all of the examples I\u2019ve showed you have had unique axis labels (index values). While many pandas functions (like reindex) require that the labels be unique, it\u2019s not mandatory. Let\u2019s consider a small Series with duplicate indices:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj = Series(range(5), index=['a', 'a', 'b', 'b', 'c'])\n", | |
"obj" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 160, | |
"text": [ | |
"a 0\n", | |
"a 1\n", | |
"b 2\n", | |
"b 3\n", | |
"c 4\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 160 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj.index.is_unique" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 161, | |
"text": [ | |
"False" | |
] | |
} | |
], | |
"prompt_number": 161 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj['a']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 162, | |
"text": [ | |
"a 0\n", | |
"a 1\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 162 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df = DataFrame(np.random.randn(4, 3), index=['a', 'a', 'b', 'b'])\n", | |
"df" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td> 0.659189</td>\n", | |
" <td> 2.247419</td>\n", | |
" <td> 0.317194</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td>-0.812310</td>\n", | |
" <td>-0.311062</td>\n", | |
" <td> 1.263877</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td>-0.078266</td>\n", | |
" <td>-0.226042</td>\n", | |
" <td>-0.265894</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td>-0.972430</td>\n", | |
" <td> 0.833500</td>\n", | |
" <td>-0.604137</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 163, | |
"text": [ | |
" 0 1 2\n", | |
"a 0.659189 2.247419 0.317194\n", | |
"a -0.812310 -0.311062 1.263877\n", | |
"b -0.078266 -0.226042 -0.265894\n", | |
"b -0.972430 0.833500 -0.604137" | |
] | |
} | |
], | |
"prompt_number": 163 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.ix['b']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td>-0.078266</td>\n", | |
" <td>-0.226042</td>\n", | |
" <td>-0.265894</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td>-0.972430</td>\n", | |
" <td> 0.833500</td>\n", | |
" <td>-0.604137</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 164, | |
"text": [ | |
" 0 1 2\n", | |
"b -0.078266 -0.226042 -0.265894\n", | |
"b -0.972430 0.833500 -0.604137" | |
] | |
} | |
], | |
"prompt_number": 164 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Summarizing and Computing Descriptive Statistics \n", | |
"\n", | |
"pandas objects are equipped with a set of common mathematical and statistical meth- ods. Most of these fall into the category of reductions or summary statistics, methods that extract a single value (like the sum or mean) from a Series or a Series of values from the rows or columns of a DataFrame. Compared with the equivalent methods of vanilla NumPy arrays, they are all built from the ground up to exclude missing data. Consider a small DataFrame:\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df = DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]],\n", | |
" index=['a', 'b', 'c', 'd'], columns=['one', 'two'])\n", | |
"df" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>one</th>\n", | |
" <th>two</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td> 1.40</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td> 7.10</td>\n", | |
" <td>-4.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <td> NaN</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <td> 0.75</td>\n", | |
" <td>-1.3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": [ | |
" one two\n", | |
"a 1.40 NaN\n", | |
"b 7.10 -4.5\n", | |
"c NaN NaN\n", | |
"d 0.75 -1.3" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.sum()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 7, | |
"text": [ | |
"one 9.25\n", | |
"two -5.80\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.sum(axis=1)\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 11, | |
"text": [ | |
"a 1.40\n", | |
"b 2.60\n", | |
"c NaN\n", | |
"d -0.55\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.sum(axis=1, skipna=False)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 12, | |
"text": [ | |
"a NaN\n", | |
"b 2.60\n", | |
"c NaN\n", | |
"d -0.55\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.cumsum()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>one</th>\n", | |
" <th>two</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td> 1.40</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td> 8.50</td>\n", | |
" <td>-4.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <td> NaN</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <td> 9.25</td>\n", | |
" <td>-5.8</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 13, | |
"text": [ | |
" one two\n", | |
"a 1.40 NaN\n", | |
"b 8.50 -4.5\n", | |
"c NaN NaN\n", | |
"d 9.25 -5.8" | |
] | |
} | |
], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.describe()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>one</th>\n", | |
" <th>two</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td> 3.000000</td>\n", | |
" <td> 2.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td> 3.083333</td>\n", | |
" <td>-2.900000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td> 3.493685</td>\n", | |
" <td> 2.262742</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 0.750000</td>\n", | |
" <td>-4.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 1.075000</td>\n", | |
" <td>-3.700000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 1.400000</td>\n", | |
" <td>-2.900000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td> 4.250000</td>\n", | |
" <td>-2.100000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 7.100000</td>\n", | |
" <td>-1.300000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 14, | |
"text": [ | |
" one two\n", | |
"count 3.000000 2.000000\n", | |
"mean 3.083333 -2.900000\n", | |
"std 3.493685 2.262742\n", | |
"min 0.750000 -4.500000\n", | |
"25% 1.075000 -3.700000\n", | |
"50% 1.400000 -2.900000\n", | |
"75% 4.250000 -2.100000\n", | |
"max 7.100000 -1.300000" | |
] | |
} | |
], | |
"prompt_number": 14 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Correlation and Covariance" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pandas.io.data as web\n", | |
"all_data = {}\n", | |
"for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']:\n", | |
" all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2000', '1/1/2010')\n", | |
"\n", | |
"price = DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()})\n", | |
"volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()})\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 15 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"price" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<pre>\n", | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"DatetimeIndex: 2515 entries, 2000-01-03 00:00:00 to 2009-12-31 00:00:00\n", | |
"Data columns (total 4 columns):\n", | |
"AAPL 2515 non-null values\n", | |
"GOOG 1353 non-null values\n", | |
"IBM 2515 non-null values\n", | |
"MSFT 2515 non-null values\n", | |
"dtypes: float64(4)\n", | |
"</pre>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 16, | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"DatetimeIndex: 2515 entries, 2000-01-03 00:00:00 to 2009-12-31 00:00:00\n", | |
"Data columns (total 4 columns):\n", | |
"AAPL 2515 non-null values\n", | |
"GOOG 1353 non-null values\n", | |
"IBM 2515 non-null values\n", | |
"MSFT 2515 non-null values\n", | |
"dtypes: float64(4)" | |
] | |
} | |
], | |
"prompt_number": 16 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"volume" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<pre>\n", | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"DatetimeIndex: 2515 entries, 2000-01-03 00:00:00 to 2009-12-31 00:00:00\n", | |
"Data columns (total 4 columns):\n", | |
"AAPL 2515 non-null values\n", | |
"GOOG 1353 non-null values\n", | |
"IBM 2515 non-null values\n", | |
"MSFT 2515 non-null values\n", | |
"dtypes: float64(1), int64(3)\n", | |
"</pre>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 18, | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"DatetimeIndex: 2515 entries, 2000-01-03 00:00:00 to 2009-12-31 00:00:00\n", | |
"Data columns (total 4 columns):\n", | |
"AAPL 2515 non-null values\n", | |
"GOOG 1353 non-null values\n", | |
"IBM 2515 non-null values\n", | |
"MSFT 2515 non-null values\n", | |
"dtypes: float64(1), int64(3)" | |
] | |
} | |
], | |
"prompt_number": 18 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"returns = price.pct_change()\n", | |
"returns.tail()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>AAPL</th>\n", | |
" <th>GOOG</th>\n", | |
" <th>IBM</th>\n", | |
" <th>MSFT</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2009-12-24</th>\n", | |
" <td> 0.034342</td>\n", | |
" <td> 0.011117</td>\n", | |
" <td> 0.004439</td>\n", | |
" <td> 0.002495</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2009-12-28</th>\n", | |
" <td> 0.012297</td>\n", | |
" <td> 0.007098</td>\n", | |
" <td> 0.013257</td>\n", | |
" <td> 0.005688</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2009-12-29</th>\n", | |
" <td>-0.011856</td>\n", | |
" <td>-0.005571</td>\n", | |
" <td>-0.003473</td>\n", | |
" <td> 0.007070</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2009-12-30</th>\n", | |
" <td> 0.012146</td>\n", | |
" <td> 0.005376</td>\n", | |
" <td> 0.005511</td>\n", | |
" <td>-0.013689</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2009-12-31</th>\n", | |
" <td>-0.004275</td>\n", | |
" <td>-0.004416</td>\n", | |
" <td>-0.012574</td>\n", | |
" <td>-0.015658</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 20, | |
"text": [ | |
" AAPL GOOG IBM MSFT\n", | |
"Date \n", | |
"2009-12-24 0.034342 0.011117 0.004439 0.002495\n", | |
"2009-12-28 0.012297 0.007098 0.013257 0.005688\n", | |
"2009-12-29 -0.011856 -0.005571 -0.003473 0.007070\n", | |
"2009-12-30 0.012146 0.005376 0.005511 -0.013689\n", | |
"2009-12-31 -0.004275 -0.004416 -0.012574 -0.015658" | |
] | |
} | |
], | |
"prompt_number": 20 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"returns.MSFT.corr(returns.IBM)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 21, | |
"text": [ | |
"0.49607155409807629" | |
] | |
} | |
], | |
"prompt_number": 21 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"returns.MSFT.cov(returns.IBM)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 22, | |
"text": [ | |
"0.00021598877352704989" | |
] | |
} | |
], | |
"prompt_number": 22 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"returns.corr()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>AAPL</th>\n", | |
" <th>GOOG</th>\n", | |
" <th>IBM</th>\n", | |
" <th>MSFT</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>AAPL</th>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 0.470740</td>\n", | |
" <td> 0.409959</td>\n", | |
" <td> 0.424209</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>GOOG</th>\n", | |
" <td> 0.470740</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 0.390578</td>\n", | |
" <td> 0.443412</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>IBM</th>\n", | |
" <td> 0.409959</td>\n", | |
" <td> 0.390578</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 0.496072</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>MSFT</th>\n", | |
" <td> 0.424209</td>\n", | |
" <td> 0.443412</td>\n", | |
" <td> 0.496072</td>\n", | |
" <td> 1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 23, | |
"text": [ | |
" AAPL GOOG IBM MSFT\n", | |
"AAPL 1.000000 0.470740 0.409959 0.424209\n", | |
"GOOG 0.470740 1.000000 0.390578 0.443412\n", | |
"IBM 0.409959 0.390578 1.000000 0.496072\n", | |
"MSFT 0.424209 0.443412 0.496072 1.000000" | |
] | |
} | |
], | |
"prompt_number": 23 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"returns.cov()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>AAPL</th>\n", | |
" <th>GOOG</th>\n", | |
" <th>IBM</th>\n", | |
" <th>MSFT</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>AAPL</th>\n", | |
" <td> 0.001028</td>\n", | |
" <td> 0.000303</td>\n", | |
" <td> 0.000252</td>\n", | |
" <td> 0.000309</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>GOOG</th>\n", | |
" <td> 0.000303</td>\n", | |
" <td> 0.000580</td>\n", | |
" <td> 0.000142</td>\n", | |
" <td> 0.000205</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>IBM</th>\n", | |
" <td> 0.000252</td>\n", | |
" <td> 0.000142</td>\n", | |
" <td> 0.000367</td>\n", | |
" <td> 0.000216</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>MSFT</th>\n", | |
" <td> 0.000309</td>\n", | |
" <td> 0.000205</td>\n", | |
" <td> 0.000216</td>\n", | |
" <td> 0.000516</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 24, | |
"text": [ | |
" AAPL GOOG IBM MSFT\n", | |
"AAPL 0.001028 0.000303 0.000252 0.000309\n", | |
"GOOG 0.000303 0.000580 0.000142 0.000205\n", | |
"IBM 0.000252 0.000142 0.000367 0.000216\n", | |
"MSFT 0.000309 0.000205 0.000216 0.000516" | |
] | |
} | |
], | |
"prompt_number": 24 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"returns.corrwith(returns.IBM)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 25, | |
"text": [ | |
"AAPL 0.409959\n", | |
"GOOG 0.390578\n", | |
"IBM 1.000000\n", | |
"MSFT 0.496072\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 25 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Unique Values, Value Counts, and Membership" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj = Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])\n", | |
"uniques = obj.unique()\n", | |
"uniques " | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 26, | |
"text": [ | |
"array([c, a, d, b], dtype=object)" | |
] | |
} | |
], | |
"prompt_number": 26 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"obj.value_counts()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 27, | |
"text": [ | |
"c 3\n", | |
"a 3\n", | |
"b 2\n", | |
"d 1\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 27 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pd.value_counts(obj.values, sort=False)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 28, | |
"text": [ | |
"a 3\n", | |
"c 3\n", | |
"b 2\n", | |
"d 1\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 28 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pd.value_counts(obj.values, sort=True)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 29, | |
"text": [ | |
"c 3\n", | |
"a 3\n", | |
"b 2\n", | |
"d 1\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 29 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"mask = obj.isin(['b', 'c'])\n", | |
"mask" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 30, | |
"text": [ | |
"0 True\n", | |
"1 False\n", | |
"2 False\n", | |
"3 False\n", | |
"4 False\n", | |
"5 True\n", | |
"6 True\n", | |
"7 True\n", | |
"8 True\n", | |
"dtype: bool" | |
] | |
} | |
], | |
"prompt_number": 30 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data = DataFrame({'Qu1': [1, 3, 4, 3, 4], \n", | |
" 'Qu2': [2, 3, 1, 2, 3],\n", | |
" 'Qu3': [1, 5, 2, 4, 4]})\n", | |
"data" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Qu1</th>\n", | |
" <th>Qu2</th>\n", | |
" <th>Qu3</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 3</td>\n", | |
" <td> 3</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 4</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 3</td>\n", | |
" <td> 2</td>\n", | |
" <td> 4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 4</td>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 31, | |
"text": [ | |
" Qu1 Qu2 Qu3\n", | |
"0 1 2 1\n", | |
"1 3 3 5\n", | |
"2 4 1 2\n", | |
"3 3 2 4\n", | |
"4 4 3 4" | |
] | |
} | |
], | |
"prompt_number": 31 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"result = data.apply(pd.value_counts)\n", | |
"result" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Qu1</th>\n", | |
" <th>Qu2</th>\n", | |
" <th>Qu3</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 1</td>\n", | |
" <td> 1</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>NaN</td>\n", | |
" <td> 2</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 2</td>\n", | |
" <td> 2</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 2</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 33, | |
"text": [ | |
" Qu1 Qu2 Qu3\n", | |
"1 1 1 1\n", | |
"2 NaN 2 1\n", | |
"3 2 2 NaN\n", | |
"4 2 NaN 2\n", | |
"5 NaN NaN 1" | |
] | |
} | |
], | |
"prompt_number": 33 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"result = data.apply(pd.value_counts).fillna(0)\n", | |
"result" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Qu1</th>\n", | |
" <th>Qu2</th>\n", | |
" <th>Qu3</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 1</td>\n", | |
" <td> 1</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 0</td>\n", | |
" <td> 2</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 2</td>\n", | |
" <td> 2</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 2</td>\n", | |
" <td> 0</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 35, | |
"text": [ | |
" Qu1 Qu2 Qu3\n", | |
"1 1 1 1\n", | |
"2 0 2 1\n", | |
"3 2 2 0\n", | |
"4 2 0 2\n", | |
"5 0 0 1" | |
] | |
} | |
], | |
"prompt_number": 35 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Handling Missing Data\n", | |
"\n", | |
"Missing data is common in most data analysis applications. One of the goals in de- signing pandas was to make working with missing data as painless as possible. For example, all of the descriptive statistics on pandas objects exclude missing data as you\u2019ve seen earlier in the chapter." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"string_data = Series(['aardvark', 'artichoke', np.nan, 'avocado'])\n", | |
"string_data" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 36, | |
"text": [ | |
"0 aardvark\n", | |
"1 artichoke\n", | |
"2 NaN\n", | |
"3 avocado\n", | |
"dtype: object" | |
] | |
} | |
], | |
"prompt_number": 36 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"string_data.isnull()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 37, | |
"text": [ | |
"0 False\n", | |
"1 False\n", | |
"2 True\n", | |
"3 False\n", | |
"dtype: bool" | |
] | |
} | |
], | |
"prompt_number": 37 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"string_data[0] = None\n", | |
"string_data" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 39, | |
"text": [ | |
"0 None\n", | |
"1 artichoke\n", | |
"2 NaN\n", | |
"3 avocado\n", | |
"dtype: object" | |
] | |
} | |
], | |
"prompt_number": 39 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Filtering Out Missing Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from numpy import nan as NA\n", | |
"data = Series([1,NA,3.5,NA,7])\n", | |
"data.dropna()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 41, | |
"text": [ | |
"0 1.0\n", | |
"2 3.5\n", | |
"4 7.0\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 41 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data[data.notnull()]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 42, | |
"text": [ | |
"0 1.0\n", | |
"2 3.5\n", | |
"4 7.0\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 42 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data = DataFrame([[1., 6.5, 3.], [1., NA, NA], [NA, NA, NA], [NA, 6.5, 3.]])\n", | |
"cleaned = data.dropna()\n", | |
"cleaned" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 1</td>\n", | |
" <td> 6.5</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 44, | |
"text": [ | |
" 0 1 2\n", | |
"0 1 6.5 3" | |
] | |
} | |
], | |
"prompt_number": 44 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data.dropna(how='all')\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 1</td>\n", | |
" <td> 6.5</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 1</td>\n", | |
" <td> NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>NaN</td>\n", | |
" <td> 6.5</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 46, | |
"text": [ | |
" 0 1 2\n", | |
"0 1 6.5 3\n", | |
"1 1 NaN NaN\n", | |
"3 NaN 6.5 3" | |
] | |
} | |
], | |
"prompt_number": 46 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df = DataFrame(np.random.randn(7,3))\n", | |
"df" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 0.404052</td>\n", | |
" <td> 0.409225</td>\n", | |
" <td> 0.978779</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>-0.494410</td>\n", | |
" <td>-0.398292</td>\n", | |
" <td>-0.958605</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>-0.197636</td>\n", | |
" <td> 1.250569</td>\n", | |
" <td> 0.626912</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 0.806719</td>\n", | |
" <td>-2.660904</td>\n", | |
" <td> 0.142528</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>-0.528672</td>\n", | |
" <td>-2.018280</td>\n", | |
" <td>-1.702336</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>-0.594223</td>\n", | |
" <td> 1.046469</td>\n", | |
" <td>-0.478642</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td> 1.096200</td>\n", | |
" <td> 0.750286</td>\n", | |
" <td> 0.823056</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 48, | |
"text": [ | |
" 0 1 2\n", | |
"0 0.404052 0.409225 0.978779\n", | |
"1 -0.494410 -0.398292 -0.958605\n", | |
"2 -0.197636 1.250569 0.626912\n", | |
"3 0.806719 -2.660904 0.142528\n", | |
"4 -0.528672 -2.018280 -1.702336\n", | |
"5 -0.594223 1.046469 -0.478642\n", | |
"6 1.096200 0.750286 0.823056" | |
] | |
} | |
], | |
"prompt_number": 48 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.ix[:4,1]=NA\n", | |
"df.ix[:2,2]=NA\n", | |
"df" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 0.404052</td>\n", | |
" <td> NaN</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>-0.494410</td>\n", | |
" <td> NaN</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>-0.197636</td>\n", | |
" <td> NaN</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 0.806719</td>\n", | |
" <td> NaN</td>\n", | |
" <td> 0.142528</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>-0.528672</td>\n", | |
" <td> NaN</td>\n", | |
" <td>-1.702336</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>-0.594223</td>\n", | |
" <td> 1.046469</td>\n", | |
" <td>-0.478642</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td> 1.096200</td>\n", | |
" <td> 0.750286</td>\n", | |
" <td> 0.823056</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 49, | |
"text": [ | |
" 0 1 2\n", | |
"0 0.404052 NaN NaN\n", | |
"1 -0.494410 NaN NaN\n", | |
"2 -0.197636 NaN NaN\n", | |
"3 0.806719 NaN 0.142528\n", | |
"4 -0.528672 NaN -1.702336\n", | |
"5 -0.594223 1.046469 -0.478642\n", | |
"6 1.096200 0.750286 0.823056" | |
] | |
} | |
], | |
"prompt_number": 49 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# A related way to filter out DataFrame rows tends to concern time series data.\n", | |
"# Suppose you want to keep only rows containing a certain number of observations. \n", | |
"# You can indicate this with the thresh argument:\n", | |
"\n", | |
"df.dropna(thresh=3)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>-0.594223</td>\n", | |
" <td> 1.046469</td>\n", | |
" <td>-0.478642</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td> 1.096200</td>\n", | |
" <td> 0.750286</td>\n", | |
" <td> 0.823056</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 56, | |
"text": [ | |
" 0 1 2\n", | |
"5 -0.594223 1.046469 -0.478642\n", | |
"6 1.096200 0.750286 0.823056" | |
] | |
} | |
], | |
"prompt_number": 56 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Filling in Missing Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.fillna(0)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 0.404052</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>-0.494410</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>-0.197636</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 0.806719</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.142528</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>-0.528672</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td>-1.702336</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>-0.594223</td>\n", | |
" <td> 1.046469</td>\n", | |
" <td>-0.478642</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td> 1.096200</td>\n", | |
" <td> 0.750286</td>\n", | |
" <td> 0.823056</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 57, | |
"text": [ | |
" 0 1 2\n", | |
"0 0.404052 0.000000 0.000000\n", | |
"1 -0.494410 0.000000 0.000000\n", | |
"2 -0.197636 0.000000 0.000000\n", | |
"3 0.806719 0.000000 0.142528\n", | |
"4 -0.528672 0.000000 -1.702336\n", | |
"5 -0.594223 1.046469 -0.478642\n", | |
"6 1.096200 0.750286 0.823056" | |
] | |
} | |
], | |
"prompt_number": 57 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.fillna({1:0.5, 3:-1, 2:0.1})" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 0.404052</td>\n", | |
" <td> 0.500000</td>\n", | |
" <td> 0.100000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>-0.494410</td>\n", | |
" <td> 0.500000</td>\n", | |
" <td> 0.100000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>-0.197636</td>\n", | |
" <td> 0.500000</td>\n", | |
" <td> 0.100000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 0.806719</td>\n", | |
" <td> 0.500000</td>\n", | |
" <td> 0.142528</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>-0.528672</td>\n", | |
" <td> 0.500000</td>\n", | |
" <td>-1.702336</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>-0.594223</td>\n", | |
" <td> 1.046469</td>\n", | |
" <td>-0.478642</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td> 1.096200</td>\n", | |
" <td> 0.750286</td>\n", | |
" <td> 0.823056</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 58, | |
"text": [ | |
" 0 1 2\n", | |
"0 0.404052 0.500000 0.100000\n", | |
"1 -0.494410 0.500000 0.100000\n", | |
"2 -0.197636 0.500000 0.100000\n", | |
"3 0.806719 0.500000 0.142528\n", | |
"4 -0.528672 0.500000 -1.702336\n", | |
"5 -0.594223 1.046469 -0.478642\n", | |
"6 1.096200 0.750286 0.823056" | |
] | |
} | |
], | |
"prompt_number": 58 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"_ = df.fillna(0, inplace=True)\n", | |
"df" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 0.404052</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>-0.494410</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>-0.197636</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 0.806719</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.142528</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>-0.528672</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td>-1.702336</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>-0.594223</td>\n", | |
" <td> 1.046469</td>\n", | |
" <td>-0.478642</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td> 1.096200</td>\n", | |
" <td> 0.750286</td>\n", | |
" <td> 0.823056</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 59, | |
"text": [ | |
" 0 1 2\n", | |
"0 0.404052 0.000000 0.000000\n", | |
"1 -0.494410 0.000000 0.000000\n", | |
"2 -0.197636 0.000000 0.000000\n", | |
"3 0.806719 0.000000 0.142528\n", | |
"4 -0.528672 0.000000 -1.702336\n", | |
"5 -0.594223 1.046469 -0.478642\n", | |
"6 1.096200 0.750286 0.823056" | |
] | |
} | |
], | |
"prompt_number": 59 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df = DataFrame(np.random.randn(6,3))\n", | |
"df.ix[2:,1] = NA\n", | |
"df.ix[4:,2] = NA\n", | |
"df" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>-1.371226</td>\n", | |
" <td> 0.397127</td>\n", | |
" <td>-1.457344</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>-0.535057</td>\n", | |
" <td>-0.816477</td>\n", | |
" <td>-0.277766</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 1.041361</td>\n", | |
" <td> NaN</td>\n", | |
" <td>-0.165556</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 2.231523</td>\n", | |
" <td> NaN</td>\n", | |
" <td>-0.005304</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>-0.247808</td>\n", | |
" <td> NaN</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td> 0.888134</td>\n", | |
" <td> NaN</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 62, | |
"text": [ | |
" 0 1 2\n", | |
"0 -1.371226 0.397127 -1.457344\n", | |
"1 -0.535057 -0.816477 -0.277766\n", | |
"2 1.041361 NaN -0.165556\n", | |
"3 2.231523 NaN -0.005304\n", | |
"4 -0.247808 NaN NaN\n", | |
"5 0.888134 NaN NaN" | |
] | |
} | |
], | |
"prompt_number": 62 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.fillna(method='ffill')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>-1.371226</td>\n", | |
" <td> 0.397127</td>\n", | |
" <td>-1.457344</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>-0.535057</td>\n", | |
" <td>-0.816477</td>\n", | |
" <td>-0.277766</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 1.041361</td>\n", | |
" <td>-0.816477</td>\n", | |
" <td>-0.165556</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 2.231523</td>\n", | |
" <td>-0.816477</td>\n", | |
" <td>-0.005304</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>-0.247808</td>\n", | |
" <td>-0.816477</td>\n", | |
" <td>-0.005304</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td> 0.888134</td>\n", | |
" <td>-0.816477</td>\n", | |
" <td>-0.005304</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 63, | |
"text": [ | |
" 0 1 2\n", | |
"0 -1.371226 0.397127 -1.457344\n", | |
"1 -0.535057 -0.816477 -0.277766\n", | |
"2 1.041361 -0.816477 -0.165556\n", | |
"3 2.231523 -0.816477 -0.005304\n", | |
"4 -0.247808 -0.816477 -0.005304\n", | |
"5 0.888134 -0.816477 -0.005304" | |
] | |
} | |
], | |
"prompt_number": 63 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data = Series([1,NA,3.5,NA,7,3.5])\n", | |
"data" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 72, | |
"text": [ | |
"0 1.0\n", | |
"1 NaN\n", | |
"2 3.5\n", | |
"3 NaN\n", | |
"4 7.0\n", | |
"5 3.5\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 72 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data.fillna(data.mean())" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 73, | |
"text": [ | |
"0 1.00\n", | |
"1 3.75\n", | |
"2 3.50\n", | |
"3 3.75\n", | |
"4 7.00\n", | |
"5 3.50\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 73 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data.fillna(data.value_counts().index[0])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 75, | |
"text": [ | |
"0 1.0\n", | |
"1 3.5\n", | |
"2 3.5\n", | |
"3 3.5\n", | |
"4 7.0\n", | |
"5 3.5\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 75 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Hierarchical Indexing\n", | |
"\n", | |
"Hierarchical indexing is an important feature of pandas enabling you to have multiple (two or more) index levels on an axis. Somewhat abstractly, it provides a way for you to work with higher dimensional data in a lower dimensional form. Let\u2019s start with a simple example; create a Series with a list of lists or arrays as the index:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data = Series(np.random.randn(10),\n", | |
" index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],\n", | |
" [1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])\n", | |
"data" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 77, | |
"text": [ | |
"a 1 -0.949916\n", | |
" 2 -0.812244\n", | |
" 3 0.840604\n", | |
"b 1 1.197306\n", | |
" 2 -0.204776\n", | |
" 3 -1.448452\n", | |
"c 1 0.202745\n", | |
" 2 0.938151\n", | |
"d 2 -1.122258\n", | |
" 3 0.214271\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 77 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data.index" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 80, | |
"text": [ | |
"MultiIndex\n", | |
"[a 1, 2, 3, b 1, 2, 3, c 1, 2, d 2, 3]" | |
] | |
} | |
], | |
"prompt_number": 80 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data['b']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 81, | |
"text": [ | |
"1 1.197306\n", | |
"2 -0.204776\n", | |
"3 -1.448452\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 81 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data.unstack()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <td>-0.949916</td>\n", | |
" <td>-0.812244</td>\n", | |
" <td> 0.840604</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td> 1.197306</td>\n", | |
" <td>-0.204776</td>\n", | |
" <td>-1.448452</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <td> 0.202745</td>\n", | |
" <td> 0.938151</td>\n", | |
" <td> NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <td> NaN</td>\n", | |
" <td>-1.122258</td>\n", | |
" <td> 0.214271</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 82, | |
"text": [ | |
" 1 2 3\n", | |
"a -0.949916 -0.812244 0.840604\n", | |
"b 1.197306 -0.204776 -1.448452\n", | |
"c 0.202745 0.938151 NaN\n", | |
"d NaN -1.122258 0.214271" | |
] | |
} | |
], | |
"prompt_number": 82 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"data.unstack().stack()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 83, | |
"text": [ | |
"a 1 -0.949916\n", | |
" 2 -0.812244\n", | |
" 3 0.840604\n", | |
"b 1 1.197306\n", | |
" 2 -0.204776\n", | |
" 3 -1.448452\n", | |
"c 1 0.202745\n", | |
" 2 0.938151\n", | |
"d 2 -1.122258\n", | |
" 3 0.214271\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 83 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame = DataFrame(np.arange(12).reshape((4, 3)),\n", | |
" index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]], \n", | |
" columns=[['Ohio', 'Ohio', 'Colorado'],\n", | |
" ['Green', 'Red', 'Green']])\n", | |
"frame" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th colspan=\"2\" halign=\"left\">Ohio</th>\n", | |
" <th>Colorado</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Green</th>\n", | |
" <th>Red</th>\n", | |
" <th>Green</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">a</th>\n", | |
" <th>1</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">b</th>\n", | |
" <th>1</th>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" <td> 8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 9</td>\n", | |
" <td> 10</td>\n", | |
" <td> 11</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 84, | |
"text": [ | |
" Ohio Colorado\n", | |
" Green Red Green\n", | |
"a 1 0 1 2\n", | |
" 2 3 4 5\n", | |
"b 1 6 7 8\n", | |
" 2 9 10 11" | |
] | |
} | |
], | |
"prompt_number": 84 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.index.names = ['key1', 'key2']\n", | |
"frame.columns.names = ['state', 'color']\n", | |
"frame" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>state</th>\n", | |
" <th colspan=\"2\" halign=\"left\">Ohio</th>\n", | |
" <th>Colorado</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>Green</th>\n", | |
" <th>Red</th>\n", | |
" <th>Green</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>key1</th>\n", | |
" <th>key2</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">a</th>\n", | |
" <th>1</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">b</th>\n", | |
" <th>1</th>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" <td> 8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 9</td>\n", | |
" <td> 10</td>\n", | |
" <td> 11</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 87, | |
"text": [ | |
"state Ohio Colorado\n", | |
"color Green Red Green\n", | |
"key1 key2 \n", | |
"a 1 0 1 2\n", | |
" 2 3 4 5\n", | |
"b 1 6 7 8\n", | |
" 2 9 10 11" | |
] | |
} | |
], | |
"prompt_number": 87 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame['Ohio']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>Green</th>\n", | |
" <th>Red</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>key1</th>\n", | |
" <th>key2</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">a</th>\n", | |
" <th>1</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">b</th>\n", | |
" <th>1</th>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 9</td>\n", | |
" <td> 10</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 88, | |
"text": [ | |
"color Green Red\n", | |
"key1 key2 \n", | |
"a 1 0 1\n", | |
" 2 3 4\n", | |
"b 1 6 7\n", | |
" 2 9 10" | |
] | |
} | |
], | |
"prompt_number": 88 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame['Ohio'].ix['a']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>color</th>\n", | |
" <th>Green</th>\n", | |
" <th>Red</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>key2</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 99, | |
"text": [ | |
"color Green Red\n", | |
"key2 \n", | |
"1 0 1\n", | |
"2 3 4" | |
] | |
} | |
], | |
"prompt_number": 99 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Reordering and Sorting Levels\n", | |
"\n", | |
"At times you will need to rearrange the order of the levels on an axis or sort the data by the values in one specific level. The swaplevel takes two level numbers or names and returns a new object with the levels interchanged (but the data is otherwise unaltered):" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.swaplevel('key1','key2')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>state</th>\n", | |
" <th colspan=\"2\" halign=\"left\">Ohio</th>\n", | |
" <th>Colorado</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>Green</th>\n", | |
" <th>Red</th>\n", | |
" <th>Green</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>key2</th>\n", | |
" <th>key1</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <th>a</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <th>a</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <th>b</th>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" <td> 8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <th>b</th>\n", | |
" <td> 9</td>\n", | |
" <td> 10</td>\n", | |
" <td> 11</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 100, | |
"text": [ | |
"state Ohio Colorado\n", | |
"color Green Red Green\n", | |
"key2 key1 \n", | |
"1 a 0 1 2\n", | |
"2 a 3 4 5\n", | |
"1 b 6 7 8\n", | |
"2 b 9 10 11" | |
] | |
} | |
], | |
"prompt_number": 100 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.sortlevel(1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>state</th>\n", | |
" <th colspan=\"2\" halign=\"left\">Ohio</th>\n", | |
" <th>Colorado</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>Green</th>\n", | |
" <th>Red</th>\n", | |
" <th>Green</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>key1</th>\n", | |
" <th>key2</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <th>1</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <th>1</th>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" <td> 8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>a</th>\n", | |
" <th>2</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <th>2</th>\n", | |
" <td> 9</td>\n", | |
" <td> 10</td>\n", | |
" <td> 11</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 101, | |
"text": [ | |
"state Ohio Colorado\n", | |
"color Green Red Green\n", | |
"key1 key2 \n", | |
"a 1 0 1 2\n", | |
"b 1 6 7 8\n", | |
"a 2 3 4 5\n", | |
"b 2 9 10 11" | |
] | |
} | |
], | |
"prompt_number": 101 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.swaplevel(0,1).sortlevel(0)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>state</th>\n", | |
" <th colspan=\"2\" halign=\"left\">Ohio</th>\n", | |
" <th>Colorado</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>Green</th>\n", | |
" <th>Red</th>\n", | |
" <th>Green</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>key2</th>\n", | |
" <th>key1</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">1</th>\n", | |
" <th>a</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" <td> 8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">2</th>\n", | |
" <th>a</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>b</th>\n", | |
" <td> 9</td>\n", | |
" <td> 10</td>\n", | |
" <td> 11</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 107, | |
"text": [ | |
"state Ohio Colorado\n", | |
"color Green Red Green\n", | |
"key2 key1 \n", | |
"1 a 0 1 2\n", | |
" b 6 7 8\n", | |
"2 a 3 4 5\n", | |
" b 9 10 11" | |
] | |
} | |
], | |
"prompt_number": 107 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.swaplevel(0,1).sortlevel(1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>state</th>\n", | |
" <th colspan=\"2\" halign=\"left\">Ohio</th>\n", | |
" <th>Colorado</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>Green</th>\n", | |
" <th>Red</th>\n", | |
" <th>Green</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>key2</th>\n", | |
" <th>key1</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <th>a</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <th>a</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <th>b</th>\n", | |
" <td> 6</td>\n", | |
" <td> 7</td>\n", | |
" <td> 8</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <th>b</th>\n", | |
" <td> 9</td>\n", | |
" <td> 10</td>\n", | |
" <td> 11</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 108, | |
"text": [ | |
"state Ohio Colorado\n", | |
"color Green Red Green\n", | |
"key2 key1 \n", | |
"1 a 0 1 2\n", | |
"2 a 3 4 5\n", | |
"1 b 6 7 8\n", | |
"2 b 9 10 11" | |
] | |
} | |
], | |
"prompt_number": 108 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Summary Statistics by Level" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.sum(level='key2')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th>state</th>\n", | |
" <th colspan=\"2\" halign=\"left\">Ohio</th>\n", | |
" <th>Colorado</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>color</th>\n", | |
" <th>Green</th>\n", | |
" <th>Red</th>\n", | |
" <th>Green</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>key2</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 6</td>\n", | |
" <td> 8</td>\n", | |
" <td> 10</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 12</td>\n", | |
" <td> 14</td>\n", | |
" <td> 16</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 109, | |
"text": [ | |
"state Ohio Colorado\n", | |
"color Green Red Green\n", | |
"key2 \n", | |
"1 6 8 10\n", | |
"2 12 14 16" | |
] | |
} | |
], | |
"prompt_number": 109 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.sum(level='color', axis=1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>Green</th>\n", | |
" <th>Red</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>key1</th>\n", | |
" <th>key2</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">a</th>\n", | |
" <th>1</th>\n", | |
" <td> 2</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 8</td>\n", | |
" <td> 4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"2\" valign=\"top\">b</th>\n", | |
" <th>1</th>\n", | |
" <td> 14</td>\n", | |
" <td> 7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 20</td>\n", | |
" <td> 10</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 110, | |
"text": [ | |
"color Green Red\n", | |
"key1 key2 \n", | |
"a 1 2 1\n", | |
" 2 8 4\n", | |
"b 1 14 7\n", | |
" 2 20 10" | |
] | |
} | |
], | |
"prompt_number": 110 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Using a DataFrame's Columns " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame = DataFrame({'a': range(7), 'b': range(7, 0, -1),\n", | |
" 'c': ['one', 'one', 'one', 'two', 'two', 'two', 'two'],\n", | |
" 'd': [0, 1, 2, 0, 1, 2, 3]})\n", | |
"frame" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 0</td>\n", | |
" <td> 7</td>\n", | |
" <td> one</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 1</td>\n", | |
" <td> 6</td>\n", | |
" <td> one</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 2</td>\n", | |
" <td> 5</td>\n", | |
" <td> one</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" <td> two</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 4</td>\n", | |
" <td> 3</td>\n", | |
" <td> two</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td> 5</td>\n", | |
" <td> 2</td>\n", | |
" <td> two</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td> 6</td>\n", | |
" <td> 1</td>\n", | |
" <td> two</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 111, | |
"text": [ | |
" a b c d\n", | |
"0 0 7 one 0\n", | |
"1 1 6 one 1\n", | |
"2 2 5 one 2\n", | |
"3 3 4 two 0\n", | |
"4 4 3 two 1\n", | |
"5 5 2 two 2\n", | |
"6 6 1 two 3" | |
] | |
} | |
], | |
"prompt_number": 111 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame2 = frame.set_index(['c','d'])\n", | |
"frame2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"3\" valign=\"top\">one</th>\n", | |
" <th>0</th>\n", | |
" <td> 0</td>\n", | |
" <td> 7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 1</td>\n", | |
" <td> 6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 2</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"4\" valign=\"top\">two</th>\n", | |
" <th>0</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 4</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 5</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 6</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 112, | |
"text": [ | |
" a b\n", | |
"c d \n", | |
"one 0 0 7\n", | |
" 1 1 6\n", | |
" 2 2 5\n", | |
"two 0 3 4\n", | |
" 1 4 3\n", | |
" 2 5 2\n", | |
" 3 6 1" | |
] | |
} | |
], | |
"prompt_number": 112 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.set_index(['c','d'], drop=False)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"3\" valign=\"top\">one</th>\n", | |
" <th>0</th>\n", | |
" <td> 0</td>\n", | |
" <td> 7</td>\n", | |
" <td> one</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 1</td>\n", | |
" <td> 6</td>\n", | |
" <td> one</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 2</td>\n", | |
" <td> 5</td>\n", | |
" <td> one</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"4\" valign=\"top\">two</th>\n", | |
" <th>0</th>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" <td> two</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 4</td>\n", | |
" <td> 3</td>\n", | |
" <td> two</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 5</td>\n", | |
" <td> 2</td>\n", | |
" <td> two</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 6</td>\n", | |
" <td> 1</td>\n", | |
" <td> two</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 113, | |
"text": [ | |
" a b c d\n", | |
"c d \n", | |
"one 0 0 7 one 0\n", | |
" 1 1 6 one 1\n", | |
" 2 2 5 one 2\n", | |
"two 0 3 4 two 0\n", | |
" 1 4 3 two 1\n", | |
" 2 5 2 two 2\n", | |
" 3 6 1 two 3" | |
] | |
} | |
], | |
"prompt_number": 113 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame2.reset_index()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>c</th>\n", | |
" <th>d</th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> one</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> one</td>\n", | |
" <td> 1</td>\n", | |
" <td> 1</td>\n", | |
" <td> 6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> one</td>\n", | |
" <td> 2</td>\n", | |
" <td> 2</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> two</td>\n", | |
" <td> 0</td>\n", | |
" <td> 3</td>\n", | |
" <td> 4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> two</td>\n", | |
" <td> 1</td>\n", | |
" <td> 4</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td> two</td>\n", | |
" <td> 2</td>\n", | |
" <td> 5</td>\n", | |
" <td> 2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td> two</td>\n", | |
" <td> 3</td>\n", | |
" <td> 6</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 115, | |
"text": [ | |
" c d a b\n", | |
"0 one 0 0 7\n", | |
"1 one 1 1 6\n", | |
"2 one 2 2 5\n", | |
"3 two 0 3 4\n", | |
"4 two 1 4 3\n", | |
"5 two 2 5 2\n", | |
"6 two 3 6 1" | |
] | |
} | |
], | |
"prompt_number": 115 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Other pandas Topics" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Integer Indexing" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"ser = Series(np.arange(3))\n", | |
"ser" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 117, | |
"text": [ | |
"0 0\n", | |
"1 1\n", | |
"2 2\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 117 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"ser2 = Series(np.arange(3), index=['a','b','c'])\n", | |
"ser2[-1]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 119, | |
"text": [ | |
"2" | |
] | |
} | |
], | |
"prompt_number": 119 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"ser.ix[:1]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 122, | |
"text": [ | |
"0 0\n", | |
"1 1\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 122 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"ser3 = Series(range(3), index=[-5,1,3])\n", | |
"print ser3\n", | |
"print ser3.iget_value(0)\n", | |
"print ser3.iget_value(1)\n", | |
"print ser3.iget_value(2)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"-5 0\n", | |
" 1 1\n", | |
" 3 2\n", | |
"dtype: int64\n", | |
"0\n", | |
"1\n", | |
"2\n" | |
] | |
} | |
], | |
"prompt_number": 132 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame = DataFrame(np.arange(6).reshape(3,2), index=[2,0,1])\n", | |
"frame" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 0</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 2</td>\n", | |
" <td> 3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 4</td>\n", | |
" <td> 5</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 126, | |
"text": [ | |
" 0 1\n", | |
"2 0 1\n", | |
"0 2 3\n", | |
"1 4 5" | |
] | |
} | |
], | |
"prompt_number": 126 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"frame.irow(1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 128, | |
"text": [ | |
"0 2\n", | |
"1 3\n", | |
"Name: 0, dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 128 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Panel Data " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pandas.io.data as web\n", | |
"pdata = pd.Panel(dict((stk, web.get_data_yahoo(stk, '1/1/2009', '6/1/2012')) \n", | |
" for stk in ['AAPL', 'GOOG', 'MSFT', 'DELL']))\n", | |
"pdata" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 135, | |
"text": [ | |
"<class 'pandas.core.panel.Panel'>\n", | |
"Dimensions: 4 (items) x 861 (major_axis) x 6 (minor_axis)\n", | |
"Items axis: AAPL to MSFT\n", | |
"Major_axis axis: 2009-01-02 00:00:00 to 2012-06-01 00:00:00\n", | |
"Minor_axis axis: Open to Adj Close" | |
] | |
} | |
], | |
"prompt_number": 135 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pdata = pdata.swapaxes('items', 'minor')\n", | |
"pdata['Adj Close']" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<pre>\n", | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"DatetimeIndex: 861 entries, 2009-01-02 00:00:00 to 2012-06-01 00:00:00\n", | |
"Data columns (total 4 columns):\n", | |
"AAPL 861 non-null values\n", | |
"DELL 861 non-null values\n", | |
"GOOG 861 non-null values\n", | |
"MSFT 861 non-null values\n", | |
"dtypes: float64(4)\n", | |
"</pre>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 136, | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"DatetimeIndex: 861 entries, 2009-01-02 00:00:00 to 2012-06-01 00:00:00\n", | |
"Data columns (total 4 columns):\n", | |
"AAPL 861 non-null values\n", | |
"DELL 861 non-null values\n", | |
"GOOG 861 non-null values\n", | |
"MSFT 861 non-null values\n", | |
"dtypes: float64(4)" | |
] | |
} | |
], | |
"prompt_number": 136 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pdata.ix[:, '6/1/2012', :]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Open</th>\n", | |
" <th>High</th>\n", | |
" <th>Low</th>\n", | |
" <th>Close</th>\n", | |
" <th>Volume</th>\n", | |
" <th>Adj Close</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>AAPL</th>\n", | |
" <td> 569.16</td>\n", | |
" <td> 572.65</td>\n", | |
" <td> 560.52</td>\n", | |
" <td> 560.99</td>\n", | |
" <td> 18606700</td>\n", | |
" <td> 545.59</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>DELL</th>\n", | |
" <td> 12.15</td>\n", | |
" <td> 12.30</td>\n", | |
" <td> 12.05</td>\n", | |
" <td> 12.07</td>\n", | |
" <td> 19396700</td>\n", | |
" <td> 11.74</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>GOOG</th>\n", | |
" <td> 571.79</td>\n", | |
" <td> 572.65</td>\n", | |
" <td> 568.35</td>\n", | |
" <td> 570.98</td>\n", | |
" <td> 3057900</td>\n", | |
" <td> 570.98</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>MSFT</th>\n", | |
" <td> 28.76</td>\n", | |
" <td> 28.96</td>\n", | |
" <td> 28.44</td>\n", | |
" <td> 28.45</td>\n", | |
" <td> 56634300</td>\n", | |
" <td> 27.42</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 137, | |
"text": [ | |
" Open High Low Close Volume Adj Close\n", | |
"AAPL 569.16 572.65 560.52 560.99 18606700 545.59\n", | |
"DELL 12.15 12.30 12.05 12.07 19396700 11.74\n", | |
"GOOG 571.79 572.65 568.35 570.98 3057900 570.98\n", | |
"MSFT 28.76 28.96 28.44 28.45 56634300 27.42" | |
] | |
} | |
], | |
"prompt_number": 137 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pdata.ix['Adj Close', '5/22/2012':, :]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>AAPL</th>\n", | |
" <th>DELL</th>\n", | |
" <th>GOOG</th>\n", | |
" <th>MSFT</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2012-05-22</th>\n", | |
" <td> 541.68</td>\n", | |
" <td> 14.67</td>\n", | |
" <td> 600.80</td>\n", | |
" <td> 28.68</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2012-05-23</th>\n", | |
" <td> 554.90</td>\n", | |
" <td> 12.15</td>\n", | |
" <td> 609.46</td>\n", | |
" <td> 28.05</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2012-05-24</th>\n", | |
" <td> 549.80</td>\n", | |
" <td> 12.11</td>\n", | |
" <td> 603.66</td>\n", | |
" <td> 28.01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2012-05-25</th>\n", | |
" <td> 546.86</td>\n", | |
" <td> 12.12</td>\n", | |
" <td> 591.53</td>\n", | |
" <td> 28.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2012-05-29</th>\n", | |
" <td> 556.56</td>\n", | |
" <td> 12.32</td>\n", | |
" <td> 594.34</td>\n", | |
" <td> 28.49</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2012-05-30</th>\n", | |
" <td> 563.27</td>\n", | |
" <td> 12.22</td>\n", | |
" <td> 588.23</td>\n", | |
" <td> 28.27</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2012-05-31</th>\n", | |
" <td> 561.87</td>\n", | |
" <td> 12.00</td>\n", | |
" <td> 580.86</td>\n", | |
" <td> 28.13</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2012-06-01</th>\n", | |
" <td> 545.59</td>\n", | |
" <td> 11.74</td>\n", | |
" <td> 570.98</td>\n", | |
" <td> 27.42</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 138, | |
"text": [ | |
" AAPL DELL GOOG MSFT\n", | |
"Date \n", | |
"2012-05-22 541.68 14.67 600.80 28.68\n", | |
"2012-05-23 554.90 12.15 609.46 28.05\n", | |
"2012-05-24 549.80 12.11 603.66 28.01\n", | |
"2012-05-25 546.86 12.12 591.53 28.00\n", | |
"2012-05-29 556.56 12.32 594.34 28.49\n", | |
"2012-05-30 563.27 12.22 588.23 28.27\n", | |
"2012-05-31 561.87 12.00 580.86 28.13\n", | |
"2012-06-01 545.59 11.74 570.98 27.42" | |
] | |
} | |
], | |
"prompt_number": 138 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"stacked = pdata.ix[:, '5/30/2012':, :].to_frame()\n", | |
"stacked" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>Open</th>\n", | |
" <th>High</th>\n", | |
" <th>Low</th>\n", | |
" <th>Close</th>\n", | |
" <th>Volume</th>\n", | |
" <th>Adj Close</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th>minor</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"4\" valign=\"top\">2012-05-30</th>\n", | |
" <th>AAPL</th>\n", | |
" <td> 569.20</td>\n", | |
" <td> 579.99</td>\n", | |
" <td> 566.56</td>\n", | |
" <td> 579.17</td>\n", | |
" <td> 18908200</td>\n", | |
" <td> 563.27</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>DELL</th>\n", | |
" <td> 12.59</td>\n", | |
" <td> 12.70</td>\n", | |
" <td> 12.46</td>\n", | |
" <td> 12.56</td>\n", | |
" <td> 19787800</td>\n", | |
" <td> 12.22</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>GOOG</th>\n", | |
" <td> 588.16</td>\n", | |
" <td> 591.90</td>\n", | |
" <td> 583.53</td>\n", | |
" <td> 588.23</td>\n", | |
" <td> 1906700</td>\n", | |
" <td> 588.23</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>MSFT</th>\n", | |
" <td> 29.35</td>\n", | |
" <td> 29.48</td>\n", | |
" <td> 29.12</td>\n", | |
" <td> 29.34</td>\n", | |
" <td> 41585500</td>\n", | |
" <td> 28.27</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"4\" valign=\"top\">2012-05-31</th>\n", | |
" <th>AAPL</th>\n", | |
" <td> 580.74</td>\n", | |
" <td> 581.50</td>\n", | |
" <td> 571.46</td>\n", | |
" <td> 577.73</td>\n", | |
" <td> 17559800</td>\n", | |
" <td> 561.87</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>DELL</th>\n", | |
" <td> 12.53</td>\n", | |
" <td> 12.54</td>\n", | |
" <td> 12.33</td>\n", | |
" <td> 12.33</td>\n", | |
" <td> 19955500</td>\n", | |
" <td> 12.00</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>GOOG</th>\n", | |
" <td> 588.72</td>\n", | |
" <td> 590.00</td>\n", | |
" <td> 579.00</td>\n", | |
" <td> 580.86</td>\n", | |
" <td> 2968300</td>\n", | |
" <td> 580.86</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>MSFT</th>\n", | |
" <td> 29.30</td>\n", | |
" <td> 29.42</td>\n", | |
" <td> 28.94</td>\n", | |
" <td> 29.19</td>\n", | |
" <td> 39134000</td>\n", | |
" <td> 28.13</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"4\" valign=\"top\">2012-06-01</th>\n", | |
" <th>AAPL</th>\n", | |
" <td> 569.16</td>\n", | |
" <td> 572.65</td>\n", | |
" <td> 560.52</td>\n", | |
" <td> 560.99</td>\n", | |
" <td> 18606700</td>\n", | |
" <td> 545.59</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>DELL</th>\n", | |
" <td> 12.15</td>\n", | |
" <td> 12.30</td>\n", | |
" <td> 12.05</td>\n", | |
" <td> 12.07</td>\n", | |
" <td> 19396700</td>\n", | |
" <td> 11.74</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>GOOG</th>\n", | |
" <td> 571.79</td>\n", | |
" <td> 572.65</td>\n", | |
" <td> 568.35</td>\n", | |
" <td> 570.98</td>\n", | |
" <td> 3057900</td>\n", | |
" <td> 570.98</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>MSFT</th>\n", | |
" <td> 28.76</td>\n", | |
" <td> 28.96</td>\n", | |
" <td> 28.44</td>\n", | |
" <td> 28.45</td>\n", | |
" <td> 56634300</td>\n", | |
" <td> 27.42</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 139, | |
"text": [ | |
" Open High Low Close Volume Adj Close\n", | |
"Date minor \n", | |
"2012-05-30 AAPL 569.20 579.99 566.56 579.17 18908200 563.27\n", | |
" DELL 12.59 12.70 12.46 12.56 19787800 12.22\n", | |
" GOOG 588.16 591.90 583.53 588.23 1906700 588.23\n", | |
" MSFT 29.35 29.48 29.12 29.34 41585500 28.27\n", | |
"2012-05-31 AAPL 580.74 581.50 571.46 577.73 17559800 561.87\n", | |
" DELL 12.53 12.54 12.33 12.33 19955500 12.00\n", | |
" GOOG 588.72 590.00 579.00 580.86 2968300 580.86\n", | |
" MSFT 29.30 29.42 28.94 29.19 39134000 28.13\n", | |
"2012-06-01 AAPL 569.16 572.65 560.52 560.99 18606700 545.59\n", | |
" DELL 12.15 12.30 12.05 12.07 19396700 11.74\n", | |
" GOOG 571.79 572.65 568.35 570.98 3057900 570.98\n", | |
" MSFT 28.76 28.96 28.44 28.45 56634300 27.42" | |
] | |
} | |
], | |
"prompt_number": 139 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"stacked.to_panel()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 140, | |
"text": [ | |
"<class 'pandas.core.panel.Panel'>\n", | |
"Dimensions: 6 (items) x 3 (major_axis) x 4 (minor_axis)\n", | |
"Items axis: Open to Adj Close\n", | |
"Major_axis axis: 2012-05-30 00:00:00 to 2012-06-01 00:00:00\n", | |
"Minor_axis axis: AAPL to MSFT" | |
] | |
} | |
], | |
"prompt_number": 140 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment