Skip to content

Instantly share code, notes, and snippets.

@jgoad
Created June 19, 2014 16:42
Show Gist options
  • Save jgoad/847c69fcc1890412b4a9 to your computer and use it in GitHub Desktop.
Save jgoad/847c69fcc1890412b4a9 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:09d229521d319637ce437142ac1150a2886677885569eead01385dd8f3840466"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Reading and Writing Data in Text Format"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"import sys\n",
"from pandas import Series, DataFrame\n",
"import numpy as np"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/ex1.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"a,b,c,d,message\r\n",
"1,2,3,4,hello\r\n",
"5,6,7,8,world\r\n",
"9,10,11,12,foo"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = pd.read_csv('ch06/ex1.csv')\n",
"df"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> hello</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = pd.read_table('ch06/ex1.csv', sep = ',') #specified delim\n",
"df"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> hello</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/ex2.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1,2,3,4,hello\r\n",
"5,6,7,8,world\r\n",
"9,10,11,12,foo"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/ex2.csv') #There isn't a header in this data, but pandas assumes the first line is the header... So tell it!"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>hello</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
" 1 2 3 4 hello\n",
"0 5 6 7 8 world\n",
"1 9 10 11 12 foo"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/ex2.csv', header = None) "
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> hello</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
" 0 1 2 3 4\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"names = ['a','b','c','d','message']\n",
"pd.read_csv('ch06/ex2.csv', names = names, index_col = 'message') # Specified the names I wanted. "
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" </tr>\n",
" <tr>\n",
" <th>message</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>hello</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>world</th>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>foo</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
" a b c d\n",
"message \n",
"hello 1 2 3 4\n",
"world 5 6 7 8\n",
"foo 9 10 11 12"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/csv_mindex.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"key1,key2,value1,value2\r\n",
"one,a,1,2\r\n",
"one,b,3,4\r\n",
"one,c,5,6\r\n",
"one,d,7,8\r\n",
"two,a,9,10\r\n",
"two,b,11,12\r\n",
"two,c,13,14\r\n",
"two,d,15,16\r\n"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"parsed = pd.read_csv('ch06/csv_mindex.csv', index_col = ['key1','key2'])\n",
"parsed"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>value1</th>\n",
" <th>value2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>key1</th>\n",
" <th>key2</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">one</th>\n",
" <th>a</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">two</th>\n",
" <th>a</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td> 13</td>\n",
" <td> 14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td> 15</td>\n",
" <td> 16</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
" value1 value2\n",
"key1 key2 \n",
"one a 1 2\n",
" b 3 4\n",
" c 5 6\n",
" d 7 8\n",
"two a 9 10\n",
" b 11 12\n",
" c 13 14\n",
" d 15 16"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"list(open('ch06/ex3.txt'))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"[' A B C\\n',\n",
" 'aaa -0.264438 -1.026059 -0.619500\\n',\n",
" 'bbb 0.927272 0.302904 -0.032399\\n',\n",
" 'ccc -0.264273 -0.386314 -0.217601\\n',\n",
" 'ddd -0.871858 -0.348382 1.100491\\n']"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = pd.read_table('ch06/ex3.txt', sep = '\\s+')\n",
"result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>aaa</th>\n",
" <td>-0.264438</td>\n",
" <td>-1.026059</td>\n",
" <td>-0.619500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>bbb</th>\n",
" <td> 0.927272</td>\n",
" <td> 0.302904</td>\n",
" <td>-0.032399</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ccc</th>\n",
" <td>-0.264273</td>\n",
" <td>-0.386314</td>\n",
" <td>-0.217601</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ddd</th>\n",
" <td>-0.871858</td>\n",
" <td>-0.348382</td>\n",
" <td> 1.100491</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
" A B C\n",
"aaa -0.264438 -1.026059 -0.619500\n",
"bbb 0.927272 0.302904 -0.032399\n",
"ccc -0.264273 -0.386314 -0.217601\n",
"ddd -0.871858 -0.348382 1.100491"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/ex4.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"# hey!\r\n",
"a,b,c,d,message\r\n",
"# just wanted to make things more difficult for you\r\n",
"# who reads CSV files with computers, anyway?\r\n",
"1,2,3,4,hello\r\n",
"5,6,7,8,world\r\n",
"9,10,11,12,foo"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = pd.read_csv('ch06/ex4.csv')\n",
"result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th># hey!</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <td> message</td>\n",
" </tr>\n",
" <tr>\n",
" <th># just wanted to make things more difficult for you</th>\n",
" <th>NaN</th>\n",
" <th>NaN</th>\n",
" <th>NaN</th>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th># who reads CSV files with computers</th>\n",
" <th> anyway?</th>\n",
" <th>NaN</th>\n",
" <th>NaN</th>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <td> hello</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <th>10</th>\n",
" <th>11</th>\n",
" <th>12</th>\n",
" <td> foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
" # hey!\n",
"a b c d message\n",
"# just wanted to make things more difficult for you NaN NaN NaN NaN\n",
"# who reads CSV files with computers anyway? NaN NaN NaN\n",
"1 2 3 4 hello\n",
"5 6 7 8 world\n",
"9 10 11 12 foo"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/ex4.csv',skiprows=[0,2,3]) #what if there were too many rows to do this by hand? "
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> hello</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 15,
"text": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/ex5.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"something,a,b,c,d,message\r\n",
"one,1,2,3,4,NA\r\n",
"two,5,6,,8,world\r\n",
"three,9,10,11,12,foo"
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = pd.read_csv('ch06/ex5.csv')\n",
"result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>something</th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> one</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> two</td>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td>NaN</td>\n",
" <td> 8</td>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> three</td>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 17,
"text": [
" something a b c d message\n",
"0 one 1 2 3 4 NaN\n",
"1 two 5 6 NaN 8 world\n",
"2 three 9 10 11 12 foo"
]
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result.isnull()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>something</th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> True</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" <td> False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 18,
"text": [
" something a b c d message\n",
"0 False False False False False True\n",
"1 False False False True False False\n",
"2 False False False False False False"
]
}
],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = pd.read_csv('ch06/ex5.csv',na_values = ['NULL'])\n",
"result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>something</th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> one</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> two</td>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td>NaN</td>\n",
" <td> 8</td>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> three</td>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 19,
"text": [
" something a b c d message\n",
"0 one 1 2 3 4 NaN\n",
"1 two 5 6 NaN 8 world\n",
"2 three 9 10 11 12 foo"
]
}
],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sentinels = {'message': ['foo','NA'],'something':['two']}\n",
"pd.read_csv('ch06/ex5.csv',na_values = sentinels) # sets the so called 'sentinel' values passed in with sentinels to NaN"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>something</th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> one</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> NaN</td>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td>NaN</td>\n",
" <td> 8</td>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> three</td>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 20,
"text": [
" something a b c d message\n",
"0 one 1 2 3 4 NaN\n",
"1 NaN 5 6 NaN 8 world\n",
"2 three 9 10 11 12 NaN"
]
}
],
"prompt_number": 20
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Reading Text Files in Pieces"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = pd.read_csv('ch06/ex6.csv')\n",
"result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" <th>three</th>\n",
" <th>four</th>\n",
" <th>key</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0 </th>\n",
" <td> 0.467976</td>\n",
" <td>-0.038649</td>\n",
" <td>-0.295344</td>\n",
" <td>-1.824726</td>\n",
" <td> L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1 </th>\n",
" <td>-0.358893</td>\n",
" <td> 1.404453</td>\n",
" <td> 0.704965</td>\n",
" <td>-0.200638</td>\n",
" <td> B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2 </th>\n",
" <td>-0.501840</td>\n",
" <td> 0.659254</td>\n",
" <td>-0.421691</td>\n",
" <td>-0.057688</td>\n",
" <td> G</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3 </th>\n",
" <td> 0.204886</td>\n",
" <td> 1.074134</td>\n",
" <td> 1.388361</td>\n",
" <td>-0.982404</td>\n",
" <td> R</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4 </th>\n",
" <td> 0.354628</td>\n",
" <td>-0.133116</td>\n",
" <td> 0.283763</td>\n",
" <td>-0.837063</td>\n",
" <td> Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5 </th>\n",
" <td> 1.817480</td>\n",
" <td> 0.742273</td>\n",
" <td> 0.419395</td>\n",
" <td>-2.251035</td>\n",
" <td> Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6 </th>\n",
" <td>-0.776764</td>\n",
" <td> 0.935518</td>\n",
" <td>-0.332872</td>\n",
" <td>-1.875641</td>\n",
" <td> U</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7 </th>\n",
" <td>-0.913135</td>\n",
" <td> 1.530624</td>\n",
" <td>-0.572657</td>\n",
" <td> 0.477252</td>\n",
" <td> K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8 </th>\n",
" <td> 0.358480</td>\n",
" <td>-0.497572</td>\n",
" <td>-0.367016</td>\n",
" <td> 0.507702</td>\n",
" <td> S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9 </th>\n",
" <td>-1.740877</td>\n",
" <td>-1.160417</td>\n",
" <td>-1.637830</td>\n",
" <td> 2.172201</td>\n",
" <td> G</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10 </th>\n",
" <td> 0.240564</td>\n",
" <td>-0.328249</td>\n",
" <td> 1.252155</td>\n",
" <td> 1.072796</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11 </th>\n",
" <td> 0.764018</td>\n",
" <td> 1.165476</td>\n",
" <td>-0.639544</td>\n",
" <td> 1.495258</td>\n",
" <td> R</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12 </th>\n",
" <td> 0.571035</td>\n",
" <td>-0.310537</td>\n",
" <td> 0.582437</td>\n",
" <td>-0.298765</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13 </th>\n",
" <td> 2.317658</td>\n",
" <td> 0.430710</td>\n",
" <td>-1.334216</td>\n",
" <td> 0.199679</td>\n",
" <td> P</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14 </th>\n",
" <td> 1.547771</td>\n",
" <td>-1.119753</td>\n",
" <td>-2.277634</td>\n",
" <td> 0.329586</td>\n",
" <td> J</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15 </th>\n",
" <td>-1.310608</td>\n",
" <td> 0.401719</td>\n",
" <td>-1.000987</td>\n",
" <td> 1.156708</td>\n",
" <td> E</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16 </th>\n",
" <td>-0.088496</td>\n",
" <td> 0.634712</td>\n",
" <td> 0.153324</td>\n",
" <td> 0.415335</td>\n",
" <td> B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17 </th>\n",
" <td>-0.018663</td>\n",
" <td>-0.247487</td>\n",
" <td>-1.446522</td>\n",
" <td> 0.750938</td>\n",
" <td> A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18 </th>\n",
" <td>-0.070127</td>\n",
" <td>-1.579097</td>\n",
" <td> 0.120892</td>\n",
" <td> 0.671432</td>\n",
" <td> F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19 </th>\n",
" <td>-0.194678</td>\n",
" <td>-0.492039</td>\n",
" <td> 2.359605</td>\n",
" <td> 0.319810</td>\n",
" <td> H</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20 </th>\n",
" <td>-0.248618</td>\n",
" <td> 0.868707</td>\n",
" <td>-0.492226</td>\n",
" <td>-0.717959</td>\n",
" <td> W</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21 </th>\n",
" <td>-1.091549</td>\n",
" <td>-0.867110</td>\n",
" <td>-0.647760</td>\n",
" <td>-0.832562</td>\n",
" <td> C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22 </th>\n",
" <td> 0.641404</td>\n",
" <td>-0.138822</td>\n",
" <td>-0.621963</td>\n",
" <td>-0.284839</td>\n",
" <td> C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23 </th>\n",
" <td> 1.216408</td>\n",
" <td> 0.992687</td>\n",
" <td> 0.165162</td>\n",
" <td>-0.069619</td>\n",
" <td> V</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24 </th>\n",
" <td>-0.564474</td>\n",
" <td> 0.792832</td>\n",
" <td> 0.747053</td>\n",
" <td> 0.571675</td>\n",
" <td> I</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25 </th>\n",
" <td> 1.759879</td>\n",
" <td>-0.515666</td>\n",
" <td>-0.230481</td>\n",
" <td> 1.362317</td>\n",
" <td> S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26 </th>\n",
" <td> 0.126266</td>\n",
" <td> 0.309281</td>\n",
" <td> 0.382820</td>\n",
" <td>-0.239199</td>\n",
" <td> L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27 </th>\n",
" <td> 1.334360</td>\n",
" <td>-0.100152</td>\n",
" <td>-0.840731</td>\n",
" <td>-0.643967</td>\n",
" <td> 6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28 </th>\n",
" <td>-0.737620</td>\n",
" <td> 0.278087</td>\n",
" <td>-0.053235</td>\n",
" <td>-0.950972</td>\n",
" <td> J</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29 </th>\n",
" <td>-1.148486</td>\n",
" <td>-0.986292</td>\n",
" <td>-0.144963</td>\n",
" <td> 0.124362</td>\n",
" <td> Y</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9970</th>\n",
" <td> 0.633495</td>\n",
" <td>-0.186524</td>\n",
" <td> 0.927627</td>\n",
" <td> 0.143164</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9971</th>\n",
" <td> 0.308636</td>\n",
" <td>-0.112857</td>\n",
" <td> 0.762842</td>\n",
" <td>-1.072977</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9972</th>\n",
" <td>-1.627051</td>\n",
" <td>-0.978151</td>\n",
" <td> 0.154745</td>\n",
" <td>-1.229037</td>\n",
" <td> Z</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9973</th>\n",
" <td> 0.314847</td>\n",
" <td> 0.097989</td>\n",
" <td> 0.199608</td>\n",
" <td> 0.955193</td>\n",
" <td> P</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9974</th>\n",
" <td> 1.666907</td>\n",
" <td> 0.992005</td>\n",
" <td> 0.496128</td>\n",
" <td>-0.686391</td>\n",
" <td> S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9975</th>\n",
" <td> 0.010603</td>\n",
" <td> 0.708540</td>\n",
" <td>-1.258711</td>\n",
" <td> 0.226541</td>\n",
" <td> K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9976</th>\n",
" <td> 0.118693</td>\n",
" <td>-0.714455</td>\n",
" <td>-0.501342</td>\n",
" <td>-0.254764</td>\n",
" <td> K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9977</th>\n",
" <td> 0.302616</td>\n",
" <td>-2.011527</td>\n",
" <td>-0.628085</td>\n",
" <td> 0.768827</td>\n",
" <td> H</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9978</th>\n",
" <td>-0.098572</td>\n",
" <td> 1.769086</td>\n",
" <td>-0.215027</td>\n",
" <td>-0.053076</td>\n",
" <td> A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9979</th>\n",
" <td>-0.019058</td>\n",
" <td> 1.964994</td>\n",
" <td> 0.738538</td>\n",
" <td>-0.883776</td>\n",
" <td> F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9980</th>\n",
" <td>-0.595349</td>\n",
" <td> 0.001781</td>\n",
" <td>-1.423355</td>\n",
" <td>-1.458477</td>\n",
" <td> M</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9981</th>\n",
" <td> 1.392170</td>\n",
" <td>-1.396560</td>\n",
" <td>-1.425306</td>\n",
" <td>-0.847535</td>\n",
" <td> H</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9982</th>\n",
" <td>-0.896029</td>\n",
" <td>-0.152287</td>\n",
" <td> 1.924483</td>\n",
" <td> 0.365184</td>\n",
" <td> 6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9983</th>\n",
" <td>-2.274642</td>\n",
" <td>-0.901874</td>\n",
" <td> 1.500352</td>\n",
" <td> 0.996541</td>\n",
" <td> N</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9984</th>\n",
" <td>-0.301898</td>\n",
" <td> 1.019906</td>\n",
" <td> 1.102160</td>\n",
" <td> 2.624526</td>\n",
" <td> I</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9985</th>\n",
" <td>-2.548389</td>\n",
" <td>-0.585374</td>\n",
" <td> 1.496201</td>\n",
" <td>-0.718815</td>\n",
" <td> D</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9986</th>\n",
" <td>-0.064588</td>\n",
" <td> 0.759292</td>\n",
" <td>-1.568415</td>\n",
" <td>-0.420933</td>\n",
" <td> E</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9987</th>\n",
" <td>-0.143365</td>\n",
" <td>-1.111760</td>\n",
" <td>-1.815581</td>\n",
" <td> 0.435274</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9988</th>\n",
" <td>-0.070412</td>\n",
" <td>-1.055921</td>\n",
" <td> 0.338017</td>\n",
" <td>-0.440763</td>\n",
" <td> X</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9989</th>\n",
" <td> 0.649148</td>\n",
" <td> 0.994273</td>\n",
" <td>-1.384227</td>\n",
" <td> 0.485120</td>\n",
" <td> Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9990</th>\n",
" <td>-0.370769</td>\n",
" <td> 0.404356</td>\n",
" <td>-1.051628</td>\n",
" <td>-1.050899</td>\n",
" <td> 8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9991</th>\n",
" <td>-0.409980</td>\n",
" <td> 0.155627</td>\n",
" <td>-0.818990</td>\n",
" <td> 1.277350</td>\n",
" <td> W</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9992</th>\n",
" <td> 0.301214</td>\n",
" <td>-1.111203</td>\n",
" <td> 0.668258</td>\n",
" <td> 0.671922</td>\n",
" <td> A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9993</th>\n",
" <td> 1.821117</td>\n",
" <td> 0.416445</td>\n",
" <td> 0.173874</td>\n",
" <td> 0.505118</td>\n",
" <td> X</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9994</th>\n",
" <td> 0.068804</td>\n",
" <td> 1.322759</td>\n",
" <td> 0.802346</td>\n",
" <td> 0.223618</td>\n",
" <td> H</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9995</th>\n",
" <td> 2.311896</td>\n",
" <td>-0.417070</td>\n",
" <td>-1.409599</td>\n",
" <td>-0.515821</td>\n",
" <td> L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9996</th>\n",
" <td>-0.479893</td>\n",
" <td>-0.650419</td>\n",
" <td> 0.745152</td>\n",
" <td>-0.646038</td>\n",
" <td> E</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9997</th>\n",
" <td> 0.523331</td>\n",
" <td> 0.787112</td>\n",
" <td> 0.486066</td>\n",
" <td> 1.093156</td>\n",
" <td> K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9998</th>\n",
" <td>-0.362559</td>\n",
" <td> 0.598894</td>\n",
" <td>-1.843201</td>\n",
" <td> 0.887292</td>\n",
" <td> G</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9999</th>\n",
" <td>-0.096376</td>\n",
" <td>-1.012999</td>\n",
" <td>-0.657431</td>\n",
" <td>-0.573315</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10000 rows \u00d7 5 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 21,
"text": [
" one two three four key\n",
"0 0.467976 -0.038649 -0.295344 -1.824726 L\n",
"1 -0.358893 1.404453 0.704965 -0.200638 B\n",
"2 -0.501840 0.659254 -0.421691 -0.057688 G\n",
"3 0.204886 1.074134 1.388361 -0.982404 R\n",
"4 0.354628 -0.133116 0.283763 -0.837063 Q\n",
"5 1.817480 0.742273 0.419395 -2.251035 Q\n",
"6 -0.776764 0.935518 -0.332872 -1.875641 U\n",
"7 -0.913135 1.530624 -0.572657 0.477252 K\n",
"8 0.358480 -0.497572 -0.367016 0.507702 S\n",
"9 -1.740877 -1.160417 -1.637830 2.172201 G\n",
"10 0.240564 -0.328249 1.252155 1.072796 8\n",
"11 0.764018 1.165476 -0.639544 1.495258 R\n",
"12 0.571035 -0.310537 0.582437 -0.298765 1\n",
"13 2.317658 0.430710 -1.334216 0.199679 P\n",
"14 1.547771 -1.119753 -2.277634 0.329586 J\n",
"15 -1.310608 0.401719 -1.000987 1.156708 E\n",
"16 -0.088496 0.634712 0.153324 0.415335 B\n",
"17 -0.018663 -0.247487 -1.446522 0.750938 A\n",
"18 -0.070127 -1.579097 0.120892 0.671432 F\n",
"19 -0.194678 -0.492039 2.359605 0.319810 H\n",
"20 -0.248618 0.868707 -0.492226 -0.717959 W\n",
"21 -1.091549 -0.867110 -0.647760 -0.832562 C\n",
"22 0.641404 -0.138822 -0.621963 -0.284839 C\n",
"23 1.216408 0.992687 0.165162 -0.069619 V\n",
"24 -0.564474 0.792832 0.747053 0.571675 I\n",
"25 1.759879 -0.515666 -0.230481 1.362317 S\n",
"26 0.126266 0.309281 0.382820 -0.239199 L\n",
"27 1.334360 -0.100152 -0.840731 -0.643967 6\n",
"28 -0.737620 0.278087 -0.053235 -0.950972 J\n",
"29 -1.148486 -0.986292 -0.144963 0.124362 Y\n",
"... ... ... ... ... ..\n",
"9970 0.633495 -0.186524 0.927627 0.143164 4\n",
"9971 0.308636 -0.112857 0.762842 -1.072977 1\n",
"9972 -1.627051 -0.978151 0.154745 -1.229037 Z\n",
"9973 0.314847 0.097989 0.199608 0.955193 P\n",
"9974 1.666907 0.992005 0.496128 -0.686391 S\n",
"9975 0.010603 0.708540 -1.258711 0.226541 K\n",
"9976 0.118693 -0.714455 -0.501342 -0.254764 K\n",
"9977 0.302616 -2.011527 -0.628085 0.768827 H\n",
"9978 -0.098572 1.769086 -0.215027 -0.053076 A\n",
"9979 -0.019058 1.964994 0.738538 -0.883776 F\n",
"9980 -0.595349 0.001781 -1.423355 -1.458477 M\n",
"9981 1.392170 -1.396560 -1.425306 -0.847535 H\n",
"9982 -0.896029 -0.152287 1.924483 0.365184 6\n",
"9983 -2.274642 -0.901874 1.500352 0.996541 N\n",
"9984 -0.301898 1.019906 1.102160 2.624526 I\n",
"9985 -2.548389 -0.585374 1.496201 -0.718815 D\n",
"9986 -0.064588 0.759292 -1.568415 -0.420933 E\n",
"9987 -0.143365 -1.111760 -1.815581 0.435274 2\n",
"9988 -0.070412 -1.055921 0.338017 -0.440763 X\n",
"9989 0.649148 0.994273 -1.384227 0.485120 Q\n",
"9990 -0.370769 0.404356 -1.051628 -1.050899 8\n",
"9991 -0.409980 0.155627 -0.818990 1.277350 W\n",
"9992 0.301214 -1.111203 0.668258 0.671922 A\n",
"9993 1.821117 0.416445 0.173874 0.505118 X\n",
"9994 0.068804 1.322759 0.802346 0.223618 H\n",
"9995 2.311896 -0.417070 -1.409599 -0.515821 L\n",
"9996 -0.479893 -0.650419 0.745152 -0.646038 E\n",
"9997 0.523331 0.787112 0.486066 1.093156 K\n",
"9998 -0.362559 0.598894 -1.843201 0.887292 G\n",
"9999 -0.096376 -1.012999 -0.657431 -0.573315 0\n",
"\n",
"[10000 rows x 5 columns]"
]
}
],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.read_csv('ch06/ex6.csv',nrows = 5)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>one</th>\n",
" <th>two</th>\n",
" <th>three</th>\n",
" <th>four</th>\n",
" <th>key</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 0.467976</td>\n",
" <td>-0.038649</td>\n",
" <td>-0.295344</td>\n",
" <td>-1.824726</td>\n",
" <td> L</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-0.358893</td>\n",
" <td> 1.404453</td>\n",
" <td> 0.704965</td>\n",
" <td>-0.200638</td>\n",
" <td> B</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.501840</td>\n",
" <td> 0.659254</td>\n",
" <td>-0.421691</td>\n",
" <td>-0.057688</td>\n",
" <td> G</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 0.204886</td>\n",
" <td> 1.074134</td>\n",
" <td> 1.388361</td>\n",
" <td>-0.982404</td>\n",
" <td> R</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 0.354628</td>\n",
" <td>-0.133116</td>\n",
" <td> 0.283763</td>\n",
" <td>-0.837063</td>\n",
" <td> Q</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 22,
"text": [
" one two three four key\n",
"0 0.467976 -0.038649 -0.295344 -1.824726 L\n",
"1 -0.358893 1.404453 0.704965 -0.200638 B\n",
"2 -0.501840 0.659254 -0.421691 -0.057688 G\n",
"3 0.204886 1.074134 1.388361 -0.982404 R\n",
"4 0.354628 -0.133116 0.283763 -0.837063 Q"
]
}
],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"chunker = pd.read_csv('ch06/ex6.csv', chunksize = 1000)\n",
"chunker"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 23,
"text": [
"<pandas.io.parsers.TextFileReader at 0x108d31a10>"
]
}
],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tot = Series([])\n",
"for piece in chunker:\n",
" tot = tot.add(piece['key'].value_counts(), fill_value = 0)\n",
" tot = tot.order(ascending = False)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tot[:10]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 25,
"text": [
"E 368\n",
"X 364\n",
"L 346\n",
"O 343\n",
"Q 340\n",
"M 338\n",
"J 337\n",
"F 335\n",
"K 334\n",
"H 330\n",
"dtype: float64"
]
}
],
"prompt_number": 25
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Writing Data Out to Text Format"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data = pd.read_csv('ch06/ex5.csv')\n",
"data"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>something</th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> one</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> two</td>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td>NaN</td>\n",
" <td> 8</td>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> three</td>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 26,
"text": [
" something a b c d message\n",
"0 one 1 2 3 4 NaN\n",
"1 two 5 6 NaN 8 world\n",
"2 three 9 10 11 12 foo"
]
}
],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.to_csv('ch06/out.csv')\n",
"!cat ch06/out.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
",something,a,b,c,d,message\r\n",
"0,one,1,2,3.0,4,\r\n",
"1,two,5,6,,8,world\r\n",
"2,three,9,10,11.0,12,foo\r\n"
]
}
],
"prompt_number": 27
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.to_csv(sys.stdout, sep = '|') #this just prints what the output would look like"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"|something|a|b|c|d|message\n",
"0|one|1|2|3.0|4|\n",
"1|two|5|6||8|world\n",
"2|three|9|10|11.0|12|foo\n"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.to_csv(sys.stdout,na_rep='NULL')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
",something,a,b,c,d,message\n",
"0,one,1,2,3.0,4,NULL\n",
"1,two,5,6,NULL,8,world\n",
"2,three,9,10,11.0,12,foo\n"
]
}
],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.to_csv(sys.stdout,index = False, header = False, na_rep = 'NULL')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"one,1,2,3.0,4,NULL\n",
"two,5,6,NULL,8,world\n",
"three,9,10,11.0,12,foo\n"
]
}
],
"prompt_number": 30
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data.to_csv(sys.stdout,index = False, columns = ['a','b','c'])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"a,b,c\n",
"1,2,3.0\n",
"5,6,\n",
"9,10,11.0\n"
]
}
],
"prompt_number": 31
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dates = pd.date_range('1/1/2000',periods = 7)\n",
"ts = Series(np.arange(7), index = dates)\n",
"ts.to_csv('ch06/tseries.csv')\n",
"!cat ch06/tseries.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"2000-01-01,0\r\n",
"2000-01-02,1\r\n",
"2000-01-03,2\r\n",
"2000-01-04,3\r\n",
"2000-01-05,4\r\n",
"2000-01-06,5\r\n",
"2000-01-07,6\r\n"
]
}
],
"prompt_number": 32
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Series.from_csv('ch06/tseries.csv', parse_dates = True)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 33,
"text": [
"2000-01-01 0\n",
"2000-01-02 1\n",
"2000-01-03 2\n",
"2000-01-04 3\n",
"2000-01-05 4\n",
"2000-01-06 5\n",
"2000-01-07 6\n",
"dtype: int64"
]
}
],
"prompt_number": 33
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Manually Working with Delimited Formats"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!cat ch06/ex7.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\"a\",\"b\",\"c\"\r\n",
"\"1\",\"2\",\"3\"\r\n",
"\"1\",\"2\",\"3\",\"4\"\r\n"
]
}
],
"prompt_number": 34
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import csv\n",
"f = open('ch06/ex7.csv')\n",
"reader = csv.reader(f)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for line in reader:\n",
" print line"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"['a', 'b', 'c']\n",
"['1', '2', '3']\n",
"['1', '2', '3', '4']\n"
]
}
],
"prompt_number": 36
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lines = list(csv.reader(open('ch06/ex7.csv')))\n",
"header , values = lines[0], lines[1:]\n",
"data_dict = {h: v for h, v in zip(header, zip(*values))}\n",
"data_dict"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 37,
"text": [
"{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}"
]
}
],
"prompt_number": 37
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"class my_dialect(csv.Dialect):\n",
" lineterminator = '\\n'\n",
" delimiter = ';'\n",
" quotechar = '\"'\n",
" quoting = 1 # had to add this in for the code to run. Not in the book.\n",
"reader = csv.reader(f,dialect = my_dialect)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 38
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with open('mydata.csv','w') as f:\n",
" writer = csv.writer(f,dialect=my_dialect)\n",
" writer.writerow(('one','two','three'))\n",
" writer.writerow(('1','2','3'))\n",
" writer.writerow(('4','5','6'))\n",
" writer.writerow(('7','8','9'))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 39
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"JSON Data"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import json"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 40
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"obj = \"\"\"\n",
"{\"name\":\"Wes\",\n",
"\"places_lived\": [\"United States\", \"Spain\", \"Germany\"],\n",
"\"pet\":null,\n",
"\"siblings\":[{\"name\": \"Scott\", \"age\":25, \"pet\": \"Zuko\"},\n",
" {\"name\": \"Katie\", \"age\": 33, \"pet\": \"Cisco\"}]\n",
"}\n",
"\"\"\""
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 41
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"result = json.loads(obj) # creates a dict from json string obj\n",
"result"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 42,
"text": [
"{u'name': u'Wes',\n",
" u'pet': None,\n",
" u'places_lived': [u'United States', u'Spain', u'Germany'],\n",
" u'siblings': [{u'age': 25, u'name': u'Scott', u'pet': u'Zuko'},\n",
" {u'age': 33, u'name': u'Katie', u'pet': u'Cisco'}]}"
]
}
],
"prompt_number": 42
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"asjson = json.dumps(result) #takes the dict back to json\n",
"asjson"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 43,
"text": [
"'{\"pet\": null, \"siblings\": [{\"pet\": \"Zuko\", \"age\": 25, \"name\": \"Scott\"}, {\"pet\": \"Cisco\", \"age\": 33, \"name\": \"Katie\"}], \"name\": \"Wes\", \"places_lived\": [\"United States\", \"Spain\", \"Germany\"]}'"
]
}
],
"prompt_number": 43
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"siblings = DataFrame(result['siblings'],columns = ['name','age']) #using the JSON data to import into a DataFrame \n",
"siblings"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> Scott</td>\n",
" <td> 25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> Katie</td>\n",
" <td> 33</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 44,
"text": [
" name age\n",
"0 Scott 25\n",
"1 Katie 33"
]
}
],
"prompt_number": 44
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"XML and HTML: Web Scraping"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from lxml.html import parse\n",
"from urllib2 import urlopen\n",
"\n",
"parsed = parse(urlopen('http://finance.yahoo.com/q/op?s=AAPL+Options'))\n",
"doc = parsed.getroot()\n",
"links = doc.findall('.//a')\n",
"links[15:20]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 45,
"text": [
"[<Element a at 0x108da5b50>,\n",
" <Element a at 0x108da5ba8>,\n",
" <Element a at 0x108da5c00>,\n",
" <Element a at 0x108da5c58>,\n",
" <Element a at 0x108da5cb0>]"
]
}
],
"prompt_number": 45
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lnk = links[28]\n",
"lnk"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 46,
"text": [
"<Element a at 0x108da5fc8>"
]
}
],
"prompt_number": 46
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lnk.get('href')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 47,
"text": [
"'https://login.yahoo.com/config/login?.src=quote&.intl=us&.lang=en-US&.done=http://finance.yahoo.com/q/op%3fs=AAPL%2bOptions'"
]
}
],
"prompt_number": 47
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lnk.text_content()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 48,
"text": [
"' Sign In '"
]
}
],
"prompt_number": 48
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"urls = [lnk.get('href') for lnk in doc.findall('.//a')]\n",
"urls[-10:]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 49,
"text": [
"['/q?s=AAPL140621P00755000',\n",
" '/q/op?s=AAPL&k=760.000000',\n",
" '/q?s=AAPL140621P00760000',\n",
" '/q/op?s=AAPL&k=785.000000',\n",
" '/q?s=AAPL140621P00785000',\n",
" '/q/os?s=AAPL&m=2014-06-27',\n",
" 'http://help.yahoo.com/l/us/yahoo/finance/quotes/fitadelay.html',\n",
" 'http://www.capitaliq.com',\n",
" 'http://www.csidata.com',\n",
" 'http://www.morningstar.com/']"
]
}
],
"prompt_number": 49
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"tables = doc.findall('.//table')\n",
"calls = tables[9]\n",
"puts = tables[13]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 50
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rows = calls.findall('.//tr')\n",
"def _unpack(row,kind = 'td'):\n",
" elts = row.findall('.//%s' % kind)\n",
" return [val.text_content() for val in elts]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 51
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"_unpack(rows[0], kind = 'td')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 52,
"text": [
"[]"
]
}
],
"prompt_number": 52
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"len(rows)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 53,
"text": [
"424"
]
}
],
"prompt_number": 53
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from pandas.io.parsers import TextParser\n",
"def parse_options_data(table):\n",
" rows = table.findall('.//tr')\n",
" header = _unpack(rows[0], kind = 'th')\n",
" data = [_unpack(r) for r in rows[1:]]\n",
" return TextParser(data, names = header).get_chunk()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 54
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"call_data = parse_options_data(calls)\n",
"put_data = parse_options_data(puts)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 55
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"call_data[:10] # no idea how this worked."
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Strike</th>\n",
" <th>Symbol</th>\n",
" <th>Last</th>\n",
" <th>Chg</th>\n",
" <th>Bid</th>\n",
" <th>Ask</th>\n",
" <th>Vol</th>\n",
" <th>Open Int</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 37.86</td>\n",
" <td> AAPL140621C00037860</td>\n",
" <td> 57.40</td>\n",
" <td> 0.00</td>\n",
" <td> 54.05</td>\n",
" <td> 54.50</td>\n",
" <td> 10</td>\n",
" <td> 10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 39.29</td>\n",
" <td> AAPL140621C00039290</td>\n",
" <td> 52.65</td>\n",
" <td> 2.65</td>\n",
" <td> 52.40</td>\n",
" <td> 53.00</td>\n",
" <td> 74</td>\n",
" <td> 9,845</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 40.00</td>\n",
" <td> AAPL140621C00040000</td>\n",
" <td> 51.04</td>\n",
" <td> 0.00</td>\n",
" <td> 51.90</td>\n",
" <td> 52.30</td>\n",
" <td> 0</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 40.71</td>\n",
" <td> AAPL140621C00040710</td>\n",
" <td> 45.65</td>\n",
" <td> 0.00</td>\n",
" <td> 51.15</td>\n",
" <td> 51.60</td>\n",
" <td> 0</td>\n",
" <td> 9,807</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 42.86</td>\n",
" <td> AAPL140621C00042860</td>\n",
" <td> 47.07</td>\n",
" <td> 0.00</td>\n",
" <td> 49.05</td>\n",
" <td> 49.50</td>\n",
" <td> 0</td>\n",
" <td> 56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td> 44.29</td>\n",
" <td> AAPL140621C00044290</td>\n",
" <td> 47.65</td>\n",
" <td> 5.58</td>\n",
" <td> 47.65</td>\n",
" <td> 48.00</td>\n",
" <td> 6</td>\n",
" <td> 1,750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td> 45.71</td>\n",
" <td> AAPL140621C00045710</td>\n",
" <td> 43.97</td>\n",
" <td> 0.00</td>\n",
" <td> 46.20</td>\n",
" <td> 46.60</td>\n",
" <td> 0</td>\n",
" <td> 1,743</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td> 47.14</td>\n",
" <td> AAPL140621C00047140</td>\n",
" <td> 44.95</td>\n",
" <td> 0.00</td>\n",
" <td> 44.65</td>\n",
" <td> 45.15</td>\n",
" <td> 0</td>\n",
" <td> 14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td> 48.57</td>\n",
" <td> AAPL140621C00048570</td>\n",
" <td> 43.20</td>\n",
" <td> 0.65</td>\n",
" <td> 43.30</td>\n",
" <td> 43.75</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td> 49.29</td>\n",
" <td> AAPL140621C00049290</td>\n",
" <td> 40.94</td>\n",
" <td> 0.00</td>\n",
" <td> 42.50</td>\n",
" <td> 43.00</td>\n",
" <td> 0</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 56,
"text": [
" Strike Symbol Last Chg Bid Ask Vol Open Int\n",
"0 37.86 AAPL140621C00037860 57.40 0.00 54.05 54.50 10 10\n",
"1 39.29 AAPL140621C00039290 52.65 2.65 52.40 53.00 74 9,845\n",
"2 40.00 AAPL140621C00040000 51.04 0.00 51.90 52.30 0 7\n",
"3 40.71 AAPL140621C00040710 45.65 0.00 51.15 51.60 0 9,807\n",
"4 42.86 AAPL140621C00042860 47.07 0.00 49.05 49.50 0 56\n",
"5 44.29 AAPL140621C00044290 47.65 5.58 47.65 48.00 6 1,750\n",
"6 45.71 AAPL140621C00045710 43.97 0.00 46.20 46.60 0 1,743\n",
"7 47.14 AAPL140621C00047140 44.95 0.00 44.65 45.15 0 14\n",
"8 48.57 AAPL140621C00048570 43.20 0.65 43.30 43.75 1 1\n",
"9 49.29 AAPL140621C00049290 40.94 0.00 42.50 43.00 0 7"
]
}
],
"prompt_number": 56
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Parsing XML with lxml.objectify"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from lxml import objectify"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 57
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"path = 'Performance_MNR.xml'\n",
"parsed = objectify.parse(open(path))\n",
"root = parsed.getroot()\n",
"data = []\n",
"skip_fields = ['PARENT_SEQ', 'INDICATOR_SEQ','DESIRED_CHANGE','DECIMAL_PLACES']\n",
"\n",
"for elt in root.INDICATOR:\n",
" el_data={}\n",
" for child in elt.getchildren():\n",
" if child.tag in skip_fields:\n",
" continue\n",
" el_data[child.tag] = child.pyval\n",
" data.append(el_data)\n",
"\n",
"perf = DataFrame(data)\n",
"perf # the output in the book says, \"Empty DataFrame\""
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AGENCY_NAME</th>\n",
" <th>CATEGORY</th>\n",
" <th>DESCRIPTION</th>\n",
" <th>FREQUENCY</th>\n",
" <th>INDICATOR_NAME</th>\n",
" <th>INDICATOR_UNIT</th>\n",
" <th>MONTHLY_ACTUAL</th>\n",
" <th>MONTHLY_TARGET</th>\n",
" <th>PERIOD_MONTH</th>\n",
" <th>PERIOD_YEAR</th>\n",
" <th>YTD_ACTUAL</th>\n",
" <th>YTD_TARGET</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 96.9</td>\n",
" <td> 95</td>\n",
" <td> 1</td>\n",
" <td> 2008</td>\n",
" <td> 96.9</td>\n",
" <td> 95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 95</td>\n",
" <td> 95</td>\n",
" <td> 2</td>\n",
" <td> 2008</td>\n",
" <td> 96</td>\n",
" <td> 95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 96.9</td>\n",
" <td> 95</td>\n",
" <td> 3</td>\n",
" <td> 2008</td>\n",
" <td> 96.3</td>\n",
" <td> 95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 98.3</td>\n",
" <td> 95</td>\n",
" <td> 4</td>\n",
" <td> 2008</td>\n",
" <td> 96.8</td>\n",
" <td> 95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 95.8</td>\n",
" <td> 95</td>\n",
" <td> 5</td>\n",
" <td> 2008</td>\n",
" <td> 96.6</td>\n",
" <td> 95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 94.4</td>\n",
" <td> 95</td>\n",
" <td> 6</td>\n",
" <td> 2008</td>\n",
" <td> 96.2</td>\n",
" <td> 95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 96</td>\n",
" <td> 95</td>\n",
" <td> 7</td>\n",
" <td> 2008</td>\n",
" <td> 96.2</td>\n",
" <td> 95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 96.4</td>\n",
" <td> 95</td>\n",
" <td> 8</td>\n",
" <td> 2008</td>\n",
" <td> 96.2</td>\n",
" <td> 95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 93.7</td>\n",
" <td> 95</td>\n",
" <td> 9</td>\n",
" <td> 2008</td>\n",
" <td> 95.9</td>\n",
" <td> 95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 96.4</td>\n",
" <td> 95</td>\n",
" <td> 10</td>\n",
" <td> 2008</td>\n",
" <td> 96</td>\n",
" <td> 95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 96.9</td>\n",
" <td> 95</td>\n",
" <td> 11</td>\n",
" <td> 2008</td>\n",
" <td> 96.1</td>\n",
" <td> 95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 95.1</td>\n",
" <td> 95</td>\n",
" <td> 12</td>\n",
" <td> 2008</td>\n",
" <td> 96</td>\n",
" <td> 95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 92.6</td>\n",
" <td> 96.2</td>\n",
" <td> 1</td>\n",
" <td> 2009</td>\n",
" <td> 92.6</td>\n",
" <td> 96.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 96.8</td>\n",
" <td> 96.2</td>\n",
" <td> 2</td>\n",
" <td> 2009</td>\n",
" <td> 94.6</td>\n",
" <td> 96.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 96.9</td>\n",
" <td> 96.2</td>\n",
" <td> 3</td>\n",
" <td> 2009</td>\n",
" <td> 95.4</td>\n",
" <td> 96.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 97.1</td>\n",
" <td> 96.2</td>\n",
" <td> 4</td>\n",
" <td> 2009</td>\n",
" <td> 95.9</td>\n",
" <td> 96.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 97.8</td>\n",
" <td> 96.2</td>\n",
" <td> 5</td>\n",
" <td> 2009</td>\n",
" <td> 96.2</td>\n",
" <td> 96.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 97.3</td>\n",
" <td> 96.2</td>\n",
" <td> 6</td>\n",
" <td> 2009</td>\n",
" <td> 96.4</td>\n",
" <td> 96.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 96.7</td>\n",
" <td> 96.2</td>\n",
" <td> 7</td>\n",
" <td> 2009</td>\n",
" <td> 96.5</td>\n",
" <td> 96.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 95.7</td>\n",
" <td> 96.2</td>\n",
" <td> 8</td>\n",
" <td> 2009</td>\n",
" <td> 96.4</td>\n",
" <td> 96.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 96.1</td>\n",
" <td> 96.2</td>\n",
" <td> 9</td>\n",
" <td> 2009</td>\n",
" <td> 96.3</td>\n",
" <td> 96.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 94.8</td>\n",
" <td> 96.2</td>\n",
" <td> 10</td>\n",
" <td> 2009</td>\n",
" <td> 96.2</td>\n",
" <td> 96.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 95.7</td>\n",
" <td> 96.2</td>\n",
" <td> 11</td>\n",
" <td> 2009</td>\n",
" <td> 96.1</td>\n",
" <td> 96.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 95</td>\n",
" <td> 96.2</td>\n",
" <td> 12</td>\n",
" <td> 2009</td>\n",
" <td> 96</td>\n",
" <td> 96.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 98</td>\n",
" <td> 96.3</td>\n",
" <td> 1</td>\n",
" <td> 2010</td>\n",
" <td> 98</td>\n",
" <td> 96.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 93</td>\n",
" <td> 96.3</td>\n",
" <td> 2</td>\n",
" <td> 2010</td>\n",
" <td> 95.6</td>\n",
" <td> 96.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 96.9</td>\n",
" <td> 96.3</td>\n",
" <td> 3</td>\n",
" <td> 2010</td>\n",
" <td> 96.1</td>\n",
" <td> 96.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 98.1</td>\n",
" <td> 96.3</td>\n",
" <td> 4</td>\n",
" <td> 2010</td>\n",
" <td> 96.6</td>\n",
" <td> 96.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 97.6</td>\n",
" <td> 96.3</td>\n",
" <td> 5</td>\n",
" <td> 2010</td>\n",
" <td> 96.8</td>\n",
" <td> 96.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29 </th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of commuter trains that arrive at thei...</td>\n",
" <td> M</td>\n",
" <td> On-Time Performance (West of Hudson)</td>\n",
" <td> %</td>\n",
" <td> 97.4</td>\n",
" <td> 96.3</td>\n",
" <td> 6</td>\n",
" <td> 2010</td>\n",
" <td> 96.9</td>\n",
" <td> 96.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>618</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 94</td>\n",
" <td> </td>\n",
" <td> 7</td>\n",
" <td> 2009</td>\n",
" <td> 95.14</td>\n",
" <td> </td>\n",
" </tr>\n",
" <tr>\n",
" <th>619</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 97</td>\n",
" <td> </td>\n",
" <td> 8</td>\n",
" <td> 2009</td>\n",
" <td> 95.38</td>\n",
" <td> </td>\n",
" </tr>\n",
" <tr>\n",
" <th>620</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 98.3</td>\n",
" <td> </td>\n",
" <td> 9</td>\n",
" <td> 2009</td>\n",
" <td> 95.7</td>\n",
" <td> </td>\n",
" </tr>\n",
" <tr>\n",
" <th>621</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 98.7</td>\n",
" <td> </td>\n",
" <td> 10</td>\n",
" <td> 2009</td>\n",
" <td> 96</td>\n",
" <td> </td>\n",
" </tr>\n",
" <tr>\n",
" <th>622</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 98.1</td>\n",
" <td> </td>\n",
" <td> 11</td>\n",
" <td> 2009</td>\n",
" <td> 96.21</td>\n",
" <td> </td>\n",
" </tr>\n",
" <tr>\n",
" <th>623</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 100</td>\n",
" <td> </td>\n",
" <td> 12</td>\n",
" <td> 2009</td>\n",
" <td> 96.5</td>\n",
" <td> </td>\n",
" </tr>\n",
" <tr>\n",
" <th>624</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 97.95</td>\n",
" <td> 97</td>\n",
" <td> 1</td>\n",
" <td> 2010</td>\n",
" <td> 97.95</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>625</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 100</td>\n",
" <td> 97</td>\n",
" <td> 2</td>\n",
" <td> 2010</td>\n",
" <td> 98.92</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>626</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 100</td>\n",
" <td> 97</td>\n",
" <td> 3</td>\n",
" <td> 2010</td>\n",
" <td> 99.29</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>627</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 100</td>\n",
" <td> 97</td>\n",
" <td> 4</td>\n",
" <td> 2010</td>\n",
" <td> 99.47</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>628</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 100</td>\n",
" <td> 97</td>\n",
" <td> 5</td>\n",
" <td> 2010</td>\n",
" <td> 99.58</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>629</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 91.21</td>\n",
" <td> 97</td>\n",
" <td> 6</td>\n",
" <td> 2010</td>\n",
" <td> 98.19</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>630</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 100</td>\n",
" <td> 97</td>\n",
" <td> 7</td>\n",
" <td> 2010</td>\n",
" <td> 98.46</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>631</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 100</td>\n",
" <td> 97</td>\n",
" <td> 8</td>\n",
" <td> 2010</td>\n",
" <td> 98.69</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>632</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 95.2</td>\n",
" <td> 97</td>\n",
" <td> 9</td>\n",
" <td> 2010</td>\n",
" <td> 98.3</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>633</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 90.91</td>\n",
" <td> 97</td>\n",
" <td> 10</td>\n",
" <td> 2010</td>\n",
" <td> 97.55</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>634</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 96.67</td>\n",
" <td> 97</td>\n",
" <td> 11</td>\n",
" <td> 2010</td>\n",
" <td> 97.47</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>635</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 90.03</td>\n",
" <td> 97</td>\n",
" <td> 12</td>\n",
" <td> 2010</td>\n",
" <td> 96.84</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>636</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 100</td>\n",
" <td> 97</td>\n",
" <td> 1</td>\n",
" <td> 2011</td>\n",
" <td> 100</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>637</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 100</td>\n",
" <td> 97</td>\n",
" <td> 2</td>\n",
" <td> 2011</td>\n",
" <td> 100</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>638</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 97.07</td>\n",
" <td> 97</td>\n",
" <td> 3</td>\n",
" <td> 2011</td>\n",
" <td> 98.86</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>639</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 98.18</td>\n",
" <td> 97</td>\n",
" <td> 4</td>\n",
" <td> 2011</td>\n",
" <td> 98.76</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>640</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> 79.18</td>\n",
" <td> 97</td>\n",
" <td> 5</td>\n",
" <td> 2011</td>\n",
" <td> 90.91</td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>641</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" <td> 6</td>\n",
" <td> 2011</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>642</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" <td> 7</td>\n",
" <td> 2011</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>643</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" <td> 8</td>\n",
" <td> 2011</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>644</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" <td> 9</td>\n",
" <td> 2011</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>645</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" <td> 10</td>\n",
" <td> 2011</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>646</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" <td> 11</td>\n",
" <td> 2011</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>647</th>\n",
" <td> Metro-North Railroad</td>\n",
" <td> Service Indicators</td>\n",
" <td> Percent of the time that escalators are operat...</td>\n",
" <td> M</td>\n",
" <td> Escalator Availability</td>\n",
" <td> %</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" <td> 12</td>\n",
" <td> 2011</td>\n",
" <td> </td>\n",
" <td> 97</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>648 rows \u00d7 12 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 58,
"text": [
" AGENCY_NAME CATEGORY \\\n",
"0 Metro-North Railroad Service Indicators \n",
"1 Metro-North Railroad Service Indicators \n",
"2 Metro-North Railroad Service Indicators \n",
"3 Metro-North Railroad Service Indicators \n",
"4 Metro-North Railroad Service Indicators \n",
"5 Metro-North Railroad Service Indicators \n",
"6 Metro-North Railroad Service Indicators \n",
"7 Metro-North Railroad Service Indicators \n",
"8 Metro-North Railroad Service Indicators \n",
"9 Metro-North Railroad Service Indicators \n",
"10 Metro-North Railroad Service Indicators \n",
"11 Metro-North Railroad Service Indicators \n",
"12 Metro-North Railroad Service Indicators \n",
"13 Metro-North Railroad Service Indicators \n",
"14 Metro-North Railroad Service Indicators \n",
"15 Metro-North Railroad Service Indicators \n",
"16 Metro-North Railroad Service Indicators \n",
"17 Metro-North Railroad Service Indicators \n",
"18 Metro-North Railroad Service Indicators \n",
"19 Metro-North Railroad Service Indicators \n",
"20 Metro-North Railroad Service Indicators \n",
"21 Metro-North Railroad Service Indicators \n",
"22 Metro-North Railroad Service Indicators \n",
"23 Metro-North Railroad Service Indicators \n",
"24 Metro-North Railroad Service Indicators \n",
"25 Metro-North Railroad Service Indicators \n",
"26 Metro-North Railroad Service Indicators \n",
"27 Metro-North Railroad Service Indicators \n",
"28 Metro-North Railroad Service Indicators \n",
"29 Metro-North Railroad Service Indicators \n",
".. ... ... \n",
"618 Metro-North Railroad Service Indicators \n",
"619 Metro-North Railroad Service Indicators \n",
"620 Metro-North Railroad Service Indicators \n",
"621 Metro-North Railroad Service Indicators \n",
"622 Metro-North Railroad Service Indicators \n",
"623 Metro-North Railroad Service Indicators \n",
"624 Metro-North Railroad Service Indicators \n",
"625 Metro-North Railroad Service Indicators \n",
"626 Metro-North Railroad Service Indicators \n",
"627 Metro-North Railroad Service Indicators \n",
"628 Metro-North Railroad Service Indicators \n",
"629 Metro-North Railroad Service Indicators \n",
"630 Metro-North Railroad Service Indicators \n",
"631 Metro-North Railroad Service Indicators \n",
"632 Metro-North Railroad Service Indicators \n",
"633 Metro-North Railroad Service Indicators \n",
"634 Metro-North Railroad Service Indicators \n",
"635 Metro-North Railroad Service Indicators \n",
"636 Metro-North Railroad Service Indicators \n",
"637 Metro-North Railroad Service Indicators \n",
"638 Metro-North Railroad Service Indicators \n",
"639 Metro-North Railroad Service Indicators \n",
"640 Metro-North Railroad Service Indicators \n",
"641 Metro-North Railroad Service Indicators \n",
"642 Metro-North Railroad Service Indicators \n",
"643 Metro-North Railroad Service Indicators \n",
"644 Metro-North Railroad Service Indicators \n",
"645 Metro-North Railroad Service Indicators \n",
"646 Metro-North Railroad Service Indicators \n",
"647 Metro-North Railroad Service Indicators \n",
"\n",
" DESCRIPTION FREQUENCY \\\n",
"0 Percent of commuter trains that arrive at thei... M \n",
"1 Percent of commuter trains that arrive at thei... M \n",
"2 Percent of commuter trains that arrive at thei... M \n",
"3 Percent of commuter trains that arrive at thei... M \n",
"4 Percent of commuter trains that arrive at thei... M \n",
"5 Percent of commuter trains that arrive at thei... M \n",
"6 Percent of commuter trains that arrive at thei... M \n",
"7 Percent of commuter trains that arrive at thei... M \n",
"8 Percent of commuter trains that arrive at thei... M \n",
"9 Percent of commuter trains that arrive at thei... M \n",
"10 Percent of commuter trains that arrive at thei... M \n",
"11 Percent of commuter trains that arrive at thei... M \n",
"12 Percent of commuter trains that arrive at thei... M \n",
"13 Percent of commuter trains that arrive at thei... M \n",
"14 Percent of commuter trains that arrive at thei... M \n",
"15 Percent of commuter trains that arrive at thei... M \n",
"16 Percent of commuter trains that arrive at thei... M \n",
"17 Percent of commuter trains that arrive at thei... M \n",
"18 Percent of commuter trains that arrive at thei... M \n",
"19 Percent of commuter trains that arrive at thei... M \n",
"20 Percent of commuter trains that arrive at thei... M \n",
"21 Percent of commuter trains that arrive at thei... M \n",
"22 Percent of commuter trains that arrive at thei... M \n",
"23 Percent of commuter trains that arrive at thei... M \n",
"24 Percent of commuter trains that arrive at thei... M \n",
"25 Percent of commuter trains that arrive at thei... M \n",
"26 Percent of commuter trains that arrive at thei... M \n",
"27 Percent of commuter trains that arrive at thei... M \n",
"28 Percent of commuter trains that arrive at thei... M \n",
"29 Percent of commuter trains that arrive at thei... M \n",
".. ... ... \n",
"618 Percent of the time that escalators are operat... M \n",
"619 Percent of the time that escalators are operat... M \n",
"620 Percent of the time that escalators are operat... M \n",
"621 Percent of the time that escalators are operat... M \n",
"622 Percent of the time that escalators are operat... M \n",
"623 Percent of the time that escalators are operat... M \n",
"624 Percent of the time that escalators are operat... M \n",
"625 Percent of the time that escalators are operat... M \n",
"626 Percent of the time that escalators are operat... M \n",
"627 Percent of the time that escalators are operat... M \n",
"628 Percent of the time that escalators are operat... M \n",
"629 Percent of the time that escalators are operat... M \n",
"630 Percent of the time that escalators are operat... M \n",
"631 Percent of the time that escalators are operat... M \n",
"632 Percent of the time that escalators are operat... M \n",
"633 Percent of the time that escalators are operat... M \n",
"634 Percent of the time that escalators are operat... M \n",
"635 Percent of the time that escalators are operat... M \n",
"636 Percent of the time that escalators are operat... M \n",
"637 Percent of the time that escalators are operat... M \n",
"638 Percent of the time that escalators are operat... M \n",
"639 Percent of the time that escalators are operat... M \n",
"640 Percent of the time that escalators are operat... M \n",
"641 Percent of the time that escalators are operat... M \n",
"642 Percent of the time that escalators are operat... M \n",
"643 Percent of the time that escalators are operat... M \n",
"644 Percent of the time that escalators are operat... M \n",
"645 Percent of the time that escalators are operat... M \n",
"646 Percent of the time that escalators are operat... M \n",
"647 Percent of the time that escalators are operat... M \n",
"\n",
" INDICATOR_NAME INDICATOR_UNIT MONTHLY_ACTUAL \\\n",
"0 On-Time Performance (West of Hudson) % 96.9 \n",
"1 On-Time Performance (West of Hudson) % 95 \n",
"2 On-Time Performance (West of Hudson) % 96.9 \n",
"3 On-Time Performance (West of Hudson) % 98.3 \n",
"4 On-Time Performance (West of Hudson) % 95.8 \n",
"5 On-Time Performance (West of Hudson) % 94.4 \n",
"6 On-Time Performance (West of Hudson) % 96 \n",
"7 On-Time Performance (West of Hudson) % 96.4 \n",
"8 On-Time Performance (West of Hudson) % 93.7 \n",
"9 On-Time Performance (West of Hudson) % 96.4 \n",
"10 On-Time Performance (West of Hudson) % 96.9 \n",
"11 On-Time Performance (West of Hudson) % 95.1 \n",
"12 On-Time Performance (West of Hudson) % 92.6 \n",
"13 On-Time Performance (West of Hudson) % 96.8 \n",
"14 On-Time Performance (West of Hudson) % 96.9 \n",
"15 On-Time Performance (West of Hudson) % 97.1 \n",
"16 On-Time Performance (West of Hudson) % 97.8 \n",
"17 On-Time Performance (West of Hudson) % 97.3 \n",
"18 On-Time Performance (West of Hudson) % 96.7 \n",
"19 On-Time Performance (West of Hudson) % 95.7 \n",
"20 On-Time Performance (West of Hudson) % 96.1 \n",
"21 On-Time Performance (West of Hudson) % 94.8 \n",
"22 On-Time Performance (West of Hudson) % 95.7 \n",
"23 On-Time Performance (West of Hudson) % 95 \n",
"24 On-Time Performance (West of Hudson) % 98 \n",
"25 On-Time Performance (West of Hudson) % 93 \n",
"26 On-Time Performance (West of Hudson) % 96.9 \n",
"27 On-Time Performance (West of Hudson) % 98.1 \n",
"28 On-Time Performance (West of Hudson) % 97.6 \n",
"29 On-Time Performance (West of Hudson) % 97.4 \n",
".. ... ... ... \n",
"618 Escalator Availability % 94 \n",
"619 Escalator Availability % 97 \n",
"620 Escalator Availability % 98.3 \n",
"621 Escalator Availability % 98.7 \n",
"622 Escalator Availability % 98.1 \n",
"623 Escalator Availability % 100 \n",
"624 Escalator Availability % 97.95 \n",
"625 Escalator Availability % 100 \n",
"626 Escalator Availability % 100 \n",
"627 Escalator Availability % 100 \n",
"628 Escalator Availability % 100 \n",
"629 Escalator Availability % 91.21 \n",
"630 Escalator Availability % 100 \n",
"631 Escalator Availability % 100 \n",
"632 Escalator Availability % 95.2 \n",
"633 Escalator Availability % 90.91 \n",
"634 Escalator Availability % 96.67 \n",
"635 Escalator Availability % 90.03 \n",
"636 Escalator Availability % 100 \n",
"637 Escalator Availability % 100 \n",
"638 Escalator Availability % 97.07 \n",
"639 Escalator Availability % 98.18 \n",
"640 Escalator Availability % 79.18 \n",
"641 Escalator Availability % \n",
"642 Escalator Availability % \n",
"643 Escalator Availability % \n",
"644 Escalator Availability % \n",
"645 Escalator Availability % \n",
"646 Escalator Availability % \n",
"647 Escalator Availability % \n",
"\n",
" MONTHLY_TARGET PERIOD_MONTH PERIOD_YEAR YTD_ACTUAL YTD_TARGET \n",
"0 95 1 2008 96.9 95 \n",
"1 95 2 2008 96 95 \n",
"2 95 3 2008 96.3 95 \n",
"3 95 4 2008 96.8 95 \n",
"4 95 5 2008 96.6 95 \n",
"5 95 6 2008 96.2 95 \n",
"6 95 7 2008 96.2 95 \n",
"7 95 8 2008 96.2 95 \n",
"8 95 9 2008 95.9 95 \n",
"9 95 10 2008 96 95 \n",
"10 95 11 2008 96.1 95 \n",
"11 95 12 2008 96 95 \n",
"12 96.2 1 2009 92.6 96.2 \n",
"13 96.2 2 2009 94.6 96.2 \n",
"14 96.2 3 2009 95.4 96.2 \n",
"15 96.2 4 2009 95.9 96.2 \n",
"16 96.2 5 2009 96.2 96.2 \n",
"17 96.2 6 2009 96.4 96.2 \n",
"18 96.2 7 2009 96.5 96.2 \n",
"19 96.2 8 2009 96.4 96.2 \n",
"20 96.2 9 2009 96.3 96.2 \n",
"21 96.2 10 2009 96.2 96.2 \n",
"22 96.2 11 2009 96.1 96.2 \n",
"23 96.2 12 2009 96 96.2 \n",
"24 96.3 1 2010 98 96.3 \n",
"25 96.3 2 2010 95.6 96.3 \n",
"26 96.3 3 2010 96.1 96.3 \n",
"27 96.3 4 2010 96.6 96.3 \n",
"28 96.3 5 2010 96.8 96.3 \n",
"29 96.3 6 2010 96.9 96.3 \n",
".. ... ... ... ... ... \n",
"618 7 2009 95.14 \n",
"619 8 2009 95.38 \n",
"620 9 2009 95.7 \n",
"621 10 2009 96 \n",
"622 11 2009 96.21 \n",
"623 12 2009 96.5 \n",
"624 97 1 2010 97.95 97 \n",
"625 97 2 2010 98.92 97 \n",
"626 97 3 2010 99.29 97 \n",
"627 97 4 2010 99.47 97 \n",
"628 97 5 2010 99.58 97 \n",
"629 97 6 2010 98.19 97 \n",
"630 97 7 2010 98.46 97 \n",
"631 97 8 2010 98.69 97 \n",
"632 97 9 2010 98.3 97 \n",
"633 97 10 2010 97.55 97 \n",
"634 97 11 2010 97.47 97 \n",
"635 97 12 2010 96.84 97 \n",
"636 97 1 2011 100 97 \n",
"637 97 2 2011 100 97 \n",
"638 97 3 2011 98.86 97 \n",
"639 97 4 2011 98.76 97 \n",
"640 97 5 2011 90.91 97 \n",
"641 97 6 2011 97 \n",
"642 97 7 2011 97 \n",
"643 97 8 2011 97 \n",
"644 97 9 2011 97 \n",
"645 97 10 2011 97 \n",
"646 97 11 2011 97 \n",
"647 97 12 2011 97 \n",
"\n",
"[648 rows x 12 columns]"
]
}
],
"prompt_number": 58
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from StringIO import StringIO\n",
"tag = '<a href=\"http://www.google.com\">Google</a>'\n",
"root = objectify.parse(StringIO(tag)).getroot()\n",
"root"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 59,
"text": [
"<Element a at 0x10aa815a8>"
]
}
],
"prompt_number": 59
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"root.get('href')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 60,
"text": [
"'http://www.google.com'"
]
}
],
"prompt_number": 60
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"root.text"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 61,
"text": [
"'Google'"
]
}
],
"prompt_number": 61
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Binary Data Formats"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame = pd.read_csv('ch06/ex1.csv')\n",
"frame"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> hello</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 62,
"text": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
}
],
"prompt_number": 62
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"frame.to_pickle('ch06/frame_pickle') # saves as a pickle\n",
"pd.read_pickle('ch06/frame_pickle') # reads as a pickle"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> hello</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 65,
"text": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
}
],
"prompt_number": 65
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Using HDF5 Format"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"store = pd.HDFStore('mydata.h5')\n",
"store['obj1'] = frame\n",
"store['obj1_col'] = frame['a']\n",
"store"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 70,
"text": [
"<class 'pandas.io.pytables.HDFStore'>\n",
"File path: mydata.h5\n",
"/obj1 frame (shape->[3,5])\n",
"/obj1_col series (shape->[3]) "
]
}
],
"prompt_number": 70
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"store['obj1']"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" <th>message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> hello</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 5</td>\n",
" <td> 6</td>\n",
" <td> 7</td>\n",
" <td> 8</td>\n",
" <td> world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 9</td>\n",
" <td> 10</td>\n",
" <td> 11</td>\n",
" <td> 12</td>\n",
" <td> foo</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 71,
"text": [
" a b c d message\n",
"0 1 2 3 4 hello\n",
"1 5 6 7 8 world\n",
"2 9 10 11 12 foo"
]
}
],
"prompt_number": 71
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Reading Microsoft Excel Files"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"\"\"\"xls_file = pd.ExcelFile('data.xls')\"\"\" #reads in an excel spreadsheet\n",
"\"\"\"table = xls_file.parse('Sheet1')\"\"\" # creates a DataFrame from it"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 73,
"text": [
"\"table = xls_file.parse('Sheet1')\""
]
}
],
"prompt_number": 73
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Interacting with HTML and Web APIs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"http://docs.python-requests.org"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import requests\n",
"url = 'http://search.twitter.com/search.json?q=python%20pandas'\n",
"resp = requests.get(url)\n",
"resp"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 81,
"text": [
"<Response [401]>"
]
}
],
"prompt_number": 81
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import json\n",
"data = json.loads(resp.text)\n",
"data.keys()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 82,
"text": [
"[u'errors']"
]
}
],
"prompt_number": 82
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment