Skip to content

Instantly share code, notes, and snippets.

@mattiasostmar
Last active December 18, 2015 07:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mattiasostmar/5745381 to your computer and use it in GitHub Desktop.
Save mattiasostmar/5745381 to your computer and use it in GitHub Desktop.
Learning Pandas within iPython Notebook
{
"metadata": {
"name": "CSV read with Pandas"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": "import urllib2, zipfile\nurl = 'http://ipython.rossant.net/'\nfilename = 'cities.zip'\ndownloaded = urllib2.urlopen(url + filename)\nfolder = 'data'\n!mkdir $folder\nwith open(filename, \"wb\") as f:\n f.write(downloaded.read())\nwith zipfile.ZipFile(filename) as zip:\n zip.extractall(folder)",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "mkdir: data: File exists\r\n"
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": "import pandas as pd\nfilename = 'data/worldcitiespop.txt'\ndata = pd.read_csv(filename)",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": "type(data)",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 3,
"text": "pandas.core.frame.DataFrame"
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": "data.shape, data.keys()",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 9,
"text": "((3173958, 7),\n Index([Country, City, AccentCity, Region, Population, Latitude, Longitude], dtype=object))"
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": "data.head(5)",
"language": "python",
"metadata": {},
"outputs": [
{
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Country</th>\n <th>City</th>\n <th>AccentCity</th>\n <th>Region</th>\n <th>Population</th>\n <th>Latitude</th>\n <th>Longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td> ad</td>\n <td> aixas</td>\n <td> Aix\ufffds</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.483333</td>\n <td> 1.466667</td>\n </tr>\n <tr>\n <th>1</th>\n <td> ad</td>\n <td> aixirivali</td>\n <td> Aixirivali</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.500000</td>\n </tr>\n <tr>\n <th>2</th>\n <td> ad</td>\n <td> aixirivall</td>\n <td> Aixirivall</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.500000</td>\n </tr>\n <tr>\n <th>3</th>\n <td> ad</td>\n <td> aixirvall</td>\n <td> Aixirvall</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.500000</td>\n </tr>\n <tr>\n <th>4</th>\n <td> ad</td>\n <td> aixovall</td>\n <td> Aixovall</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.483333</td>\n </tr>\n </tbody>\n</table>\n</div>",
"output_type": "pyout",
"prompt_number": 4,
"text": " Country City AccentCity Region Population Latitude Longitude\n0 ad aixas Aix\ufffds 6 NaN 42.483333 1.466667\n1 ad aixirivali Aixirivali 6 NaN 42.466667 1.500000\n2 ad aixirivall Aixirivall 6 NaN 42.466667 1.500000\n3 ad aixirvall Aixirvall 6 NaN 42.466667 1.500000\n4 ad aixovall Aixovall 6 NaN 42.466667 1.483333"
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": "data[0:4]",
"language": "python",
"metadata": {},
"outputs": [
{
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Country</th>\n <th>City</th>\n <th>AccentCity</th>\n <th>Region</th>\n <th>Population</th>\n <th>Latitude</th>\n <th>Longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td> ad</td>\n <td> aixas</td>\n <td> Aix\ufffds</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.483333</td>\n <td> 1.466667</td>\n </tr>\n <tr>\n <th>1</th>\n <td> ad</td>\n <td> aixirivali</td>\n <td> Aixirivali</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.500000</td>\n </tr>\n <tr>\n <th>2</th>\n <td> ad</td>\n <td> aixirivall</td>\n <td> Aixirivall</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.500000</td>\n </tr>\n <tr>\n <th>3</th>\n <td> ad</td>\n <td> aixirvall</td>\n <td> Aixirvall</td>\n <td> 6</td>\n <td>NaN</td>\n <td> 42.466667</td>\n <td> 1.500000</td>\n </tr>\n </tbody>\n</table>\n</div>",
"output_type": "pyout",
"prompt_number": 5,
"text": " Country City AccentCity Region Population Latitude Longitude\n0 ad aixas Aix\ufffds 6 NaN 42.483333 1.466667\n1 ad aixirivali Aixirivali 6 NaN 42.466667 1.500000\n2 ad aixirivall Aixirivall 6 NaN 42.466667 1.500000\n3 ad aixirvall Aixirvall 6 NaN 42.466667 1.500000"
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": "data.City[24599]",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 9,
"text": "'garm ab'"
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": "data[data.AccentCity=='Stockholm']",
"language": "python",
"metadata": {},
"outputs": [
{
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Country</th>\n <th>City</th>\n <th>AccentCity</th>\n <th>Region</th>\n <th>Population</th>\n <th>Latitude</th>\n <th>Longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2617638</th>\n <td> se</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> 07</td>\n <td> NaN</td>\n <td> 61.883056</td>\n <td> 13.750000</td>\n </tr>\n <tr>\n <th>2617639</th>\n <td> se</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> 26</td>\n <td> 1253309</td>\n <td> 59.333333</td>\n <td> 18.050000</td>\n </tr>\n <tr>\n <th>2617640</th>\n <td> se</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> 27</td>\n <td> NaN</td>\n <td> 56.133333</td>\n <td> 13.416667</td>\n </tr>\n <tr>\n <th>2957359</th>\n <td> us</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> ME</td>\n <td> NaN</td>\n <td> 47.042222</td>\n <td>-68.140000</td>\n </tr>\n <tr>\n <th>2972262</th>\n <td> us</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> MN</td>\n <td> NaN</td>\n <td> 45.036389</td>\n <td>-94.220833</td>\n </tr>\n <tr>\n <th>2985302</th>\n <td> us</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> NJ</td>\n <td> NaN</td>\n <td> 41.089444</td>\n <td>-74.517500</td>\n </tr>\n <tr>\n <th>3020588</th>\n <td> us</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> SD</td>\n <td> NaN</td>\n <td> 45.099722</td>\n <td>-96.801667</td>\n </tr>\n <tr>\n <th>3048843</th>\n <td> us</td>\n <td> stockholm</td>\n <td> Stockholm</td>\n <td> WI</td>\n <td> NaN</td>\n <td> 44.483333</td>\n <td>-92.261944</td>\n </tr>\n </tbody>\n</table>\n</div>",
"output_type": "pyout",
"prompt_number": 14,
"text": " Country City AccentCity Region Population Latitude Longitude\n2617638 se stockholm Stockholm 07 NaN 61.883056 13.750000\n2617639 se stockholm Stockholm 26 1253309 59.333333 18.050000\n2617640 se stockholm Stockholm 27 NaN 56.133333 13.416667\n2957359 us stockholm Stockholm ME NaN 47.042222 -68.140000\n2972262 us stockholm Stockholm MN NaN 45.036389 -94.220833\n2985302 us stockholm Stockholm NJ NaN 41.089444 -74.517500\n3020588 us stockholm Stockholm SD NaN 45.099722 -96.801667\n3048843 us stockholm Stockholm WI NaN 44.483333 -92.261944"
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": "sthlm = 2617639\ndata.ix[sthlm]",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 15,
"text": "Country se\nCity stockholm\nAccentCity Stockholm\nRegion 26\nPopulation 1253309\nLatitude 59.33333\nLongitude 18.05\nName: 2617639, dtype: object"
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": "",
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment