Last active
December 18, 2015 07:19
-
-
Save mattiasostmar/5745381 to your computer and use it in GitHub Desktop.
Learning Pandas within iPython Notebook
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "CSV read with Pandas" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "import urllib2, zipfile\nurl = 'http://ipython.rossant.net/'\nfilename = 'cities.zip'\ndownloaded = urllib2.urlopen(url + filename)\nfolder = 'data'\n!mkdir $folder\nwith open(filename, \"wb\") as f:\n f.write(downloaded.read())\nwith zipfile.ZipFile(filename) as zip:\n zip.extractall(folder)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "mkdir: data: File exists\r\n" | |
} | |
], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "import pandas as pd\nfilename = 'data/worldcitiespop.txt'\ndata = pd.read_csv(filename)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "type(data)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": "pandas.core.frame.DataFrame" | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "data.shape, data.keys()", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 9, | |
"text": "((3173958, 7),\n Index([Country, City, AccentCity, Region, Population, Latitude, Longitude], dtype=object))" | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "data.head(5)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Country</th>\n <th>City</th>\n <th>AccentCity</th>\n <th>Region</th>\n <th>Population</th>\n <th>Latitude</th>\n <th>Longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td> ad</td>\n <td> aixas</td>\n <td> Aix\ufffds</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.483333</td>\n <td> 1.466667</td>\n </tr>\n <tr>\n <th>1</th>\n <td> ad</td>\n <td> aixirivali</td>\n <td> Aixirivali</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.500000</td>\n </tr>\n <tr>\n <th>2</th>\n <td> ad</td>\n <td> aixirivall</td>\n <td> Aixirivall</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.500000</td>\n </tr>\n <tr>\n <th>3</th>\n <td> ad</td>\n <td> aixirvall</td>\n <td> Aixirvall</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.500000</td>\n </tr>\n <tr>\n <th>4</th>\n <td> ad</td>\n <td> aixovall</td>\n <td> Aixovall</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.483333</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"output_type": "pyout", | |
"prompt_number": 4, | |
"text": " Country City AccentCity Region Population Latitude Longitude\n0 ad aixas Aix\ufffds 6 NaN 42.483333 1.466667\n1 ad aixirivali Aixirivali 6 NaN 42.466667 1.500000\n2 ad aixirivall Aixirivall 6 NaN 42.466667 1.500000\n3 ad aixirvall Aixirvall 6 NaN 42.466667 1.500000\n4 ad aixovall Aixovall 6 NaN 42.466667 1.483333" | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "data[0:4]", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Country</th>\n <th>City</th>\n <th>AccentCity</th>\n <th>Region</th>\n <th>Population</th>\n <th>Latitude</th>\n <th>Longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td> ad</td>\n <td> aixas</td>\n <td> Aix\ufffds</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.483333</td>\n <td> 1.466667</td>\n </tr>\n <tr>\n <th>1</th>\n <td> ad</td>\n <td> aixirivali</td>\n <td> Aixirivali</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.500000</td>\n </tr>\n <tr>\n <th>2</th>\n <td> ad</td>\n <td> aixirivall</td>\n <td> Aixirivall</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.500000</td>\n </tr>\n <tr>\n <th>3</th>\n <td> ad</td>\n <td> aixirvall</td>\n <td> Aixirvall</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.500000</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"output_type": "pyout", | |
"prompt_number": 5, | |
"text": " Country City AccentCity Region Population Latitude Longitude\n0 ad aixas Aix\ufffds 6 NaN 42.483333 1.466667\n1 ad aixirivali Aixirivali 6 NaN 42.466667 1.500000\n2 ad aixirivall Aixirivall 6 NaN 42.466667 1.500000\n3 ad aixirvall Aixirvall 6 NaN 42.466667 1.500000" | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "data.City[24599]", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 9, | |
"text": "'garm ab'" | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "data[data.AccentCity=='Stockholm']", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Country</th>\n <th>City</th>\n <th>AccentCity</th>\n <th>Region</th>\n <th>Population</th>\n <th>Latitude</th>\n <th>Longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2617638</th>\n <td> se</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> 07</td>\n <td> NaN</td>\n <td> 61.883056</td>\n <td> 13.750000</td>\n </tr>\n <tr>\n <th>2617639</th>\n <td> se</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> 26</td>\n <td> 1253309</td>\n <td> 59.333333</td>\n <td> 18.050000</td>\n </tr>\n <tr>\n <th>2617640</th>\n <td> se</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> 27</td>\n <td> NaN</td>\n <td> 56.133333</td>\n <td> 13.416667</td>\n </tr>\n <tr>\n <th>2957359</th>\n <td> us</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> ME</td>\n <td> NaN</td>\n <td> 47.042222</td>\n <td>-68.140000</td>\n </tr>\n <tr>\n <th>2972262</th>\n <td> us</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> MN</td>\n <td> NaN</td>\n <td> 45.036389</td>\n <td>-94.220833</td>\n </tr>\n <tr>\n <th>2985302</th>\n <td> us</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> NJ</td>\n <td> NaN</td>\n <td> 41.089444</td>\n <td>-74.517500</td>\n </tr>\n <tr>\n <th>3020588</th>\n <td> us</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> SD</td>\n <td> NaN</td>\n <td> 45.099722</td>\n <td>-96.801667</td>\n </tr>\n <tr>\n <th>3048843</th>\n <td> us</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> WI</td>\n <td> NaN</td>\n <td> 44.483333</td>\n <td>-92.261944</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"output_type": "pyout", | |
"prompt_number": 14, | |
"text": " Country City AccentCity Region Population Latitude Longitude\n2617638 se stockholm Stockholm 07 NaN 61.883056 13.750000\n2617639 se stockholm Stockholm 26 1253309 59.333333 18.050000\n2617640 se stockholm Stockholm 27 NaN 56.133333 13.416667\n2957359 us stockholm Stockholm ME NaN 47.042222 -68.140000\n2972262 us stockholm Stockholm MN NaN 45.036389 -94.220833\n2985302 us stockholm Stockholm NJ NaN 41.089444 -74.517500\n3020588 us stockholm Stockholm SD NaN 45.099722 -96.801667\n3048843 us stockholm Stockholm WI NaN 44.483333 -92.261944" | |
} | |
], | |
"prompt_number": 14 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "sthlm = 2617639\ndata.ix[sthlm]", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 15, | |
"text": "Country se\nCity stockholm\nAccentCity Stockholm\nRegion 26\nPopulation 1253309\nLatitude 59.33333\nLongitude 18.05\nName: 2617639, dtype: object" | |
} | |
], | |
"prompt_number": 15 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment