Last active
November 9, 2015 11:37
-
-
Save mryap/b7cc27f96be1d688652f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"path = 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "IOError", | |
"evalue": "[Errno 2] No such file or directory: 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mIOError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-2-bcaecf00da5d>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;31mIOError\u001b[0m: [Errno 2] No such file or directory: 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'" | |
] | |
} | |
], | |
"source": [ | |
"open(path).readline()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "IOError", | |
"evalue": "[Errno 2] No such file or directory: 'usagov_bitly_data2012-03-16-1331923249.txt'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mIOError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-3-afd5e6727c5e>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mpath\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'usagov_bitly_data2012-03-16-1331923249.txt'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;31mIOError\u001b[0m: [Errno 2] No such file or directory: 'usagov_bitly_data2012-03-16-1331923249.txt'" | |
] | |
} | |
], | |
"source": [ | |
"path = 'usagov_bitly_data2012-03-16-1331923249.txt'\n", | |
"open(path).readline()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'{ \"a\": \"Mozilla\\\\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\\\\/535.11 (KHTML, like Gecko) Chrome\\\\/17.0.963.78 Safari\\\\/535.11\", \"c\": \"US\", \"nk\": 1, \"tz\": \"America\\\\/New_York\", \"gr\": \"MA\", \"g\": \"A6qOVH\", \"h\": \"wfLQtf\", \"l\": \"orofrog\", \"al\": \"en-US,en;q=0.8\", \"hh\": \"1.usa.gov\", \"r\": \"http:\\\\/\\\\/www.facebook.com\\\\/l\\\\/7AQEFzjSi\\\\/1.usa.gov\\\\/wfLQtf\", \"u\": \"http:\\\\/\\\\/www.ncbi.nlm.nih.gov\\\\/pubmed\\\\/22415991\", \"t\": 1331923247, \"hc\": 1331822918, \"cy\": \"Danvers\", \"ll\": [ 42.576698, -70.954903 ] }\\n'" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"path = 'usagov_bitly_data2012-03-16-1331923249.txt'\n", | |
"open(path).readline()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "NameError", | |
"evalue": "name 'records' is not defined", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-5-43e8cf08c977>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mrecords\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m: name 'records' is not defined" | |
] | |
} | |
], | |
"source": [ | |
"records[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "SyntaxError", | |
"evalue": "invalid syntax (<ipython-input-6-13ea3eeed5d8>, line 1)", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-6-13ea3eeed5d8>\"\u001b[1;36m, line \u001b[1;32m1\u001b[0m\n\u001b[1;33m time_zones = [rec['tz']]for rec in records\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" | |
] | |
} | |
], | |
"source": [ | |
"time_zones = [rec['tz']]for rec in records\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "NameError", | |
"evalue": "name 'reconds' is not defined", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-7-41aacc0f7dcf>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtime_zones\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mrec\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tz'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;32mfor\u001b[0m \u001b[0mrec\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mreconds\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m: name 'reconds' is not defined" | |
] | |
} | |
], | |
"source": [ | |
"time_zones = [rec['tz']for rec in reconds]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "NameError", | |
"evalue": "name 'records' is not defined", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-8-db4fbd348da9>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtime_zones\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mrec\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tz'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mrec\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrecords\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m: name 'records' is not defined" | |
] | |
} | |
], | |
"source": [ | |
"time_zones = [rec['tz'] for rec in records]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "NameError", | |
"evalue": "name 'records' is not defined", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-9-32f5fd54a771>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtime_zones\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mrec\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tz'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mrec\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrecords\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;34m'tz'\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrec\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m: name 'records' is not defined" | |
] | |
} | |
], | |
"source": [ | |
"time_zones = [rec['tz'] for rec in records if 'tz' in rec]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "NameError", | |
"evalue": "name 'pat' is not defined", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-10-53ddead9f562>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mjson\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mpat\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m: name 'pat' is not defined" | |
] | |
} | |
], | |
"source": [ | |
"import json\n", | |
"pat" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"path ='usagov_bitly_data2012-03-16-1331923249.txt'\n", | |
"records = [json.loads(line)for line in open(path, 'rb')]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{u'a': u'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11',\n", | |
" u'al': u'en-US,en;q=0.8',\n", | |
" u'c': u'US',\n", | |
" u'cy': u'Danvers',\n", | |
" u'g': u'A6qOVH',\n", | |
" u'gr': u'MA',\n", | |
" u'h': u'wfLQtf',\n", | |
" u'hc': 1331822918,\n", | |
" u'hh': u'1.usa.gov',\n", | |
" u'l': u'orofrog',\n", | |
" u'll': [42.576698, -70.954903],\n", | |
" u'nk': 1,\n", | |
" u'r': u'http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/wfLQtf',\n", | |
" u't': 1331923247,\n", | |
" u'tz': u'America/New_York',\n", | |
" u'u': u'http://www.ncbi.nlm.nih.gov/pubmed/22415991'}" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"records[0]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"records[0]['tz']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"u'Danvers'" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"records[0]['cy']\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"ti" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"time_zones = [rec['tz'] for rec in records if 'tz' in rec]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[u'America/New_York',\n", | |
" u'America/Denver',\n", | |
" u'America/New_York',\n", | |
" u'America/Sao_Paulo',\n", | |
" u'America/New_York',\n", | |
" u'America/New_York',\n", | |
" u'Europe/Warsaw',\n", | |
" u'',\n", | |
" u'',\n", | |
" u'']" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"time_zones[:10]" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": { | |
"collapsed": false | |
}, | |
"source": [ | |
"time_zones[:20]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "SyntaxError", | |
"evalue": "invalid syntax (<ipython-input-1-bd5c9d1cf95c>, line 1)", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-1-bd5c9d1cf95c>\"\u001b[1;36m, line \u001b[1;32m1\u001b[0m\n\u001b[1;33m from pandas impor DataFrame, Series\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" | |
] | |
} | |
], | |
"source": [ | |
"from pandas impor DataFrame, Series\n", | |
"import pandas as pd\n", | |
"frame = DataFrame(records)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "NameError", | |
"evalue": "name 'records' is not defined", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-2-be125da02e16>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSeries\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mframe\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrecords\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m: name 'records' is not defined" | |
] | |
} | |
], | |
"source": [ | |
"from pandas import DataFrame, Series\n", | |
"import pandas as pd\n", | |
"frame = DataFrame(records)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "NameError", | |
"evalue": "name 'frame' is not defined", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-3-366ced492cb8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mframe\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m: name 'frame' is not defined" | |
] | |
} | |
], | |
"source": [ | |
"frame.info()\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"counts = get_counts(time_zones)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "NameError", | |
"evalue": "name 'get_counts' is not defined", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-4-fe16d8a7f9f5>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mcounts\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_counts\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtime_zones\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m: name 'get_counts' is not defined" | |
] | |
} | |
], | |
"source": [ | |
"counts = get_counts(time_zones)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "NameError", | |
"evalue": "name 'frame' is not defined", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-5-366ced492cb8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mframe\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;31mNameError\u001b[0m: name 'frame' is not defined" | |
] | |
} | |
], | |
"source": [ | |
"frame.info()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment