Skip to content

Instantly share code, notes, and snippets.

@mryap
Last active November 9, 2015 11:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mryap/b7cc27f96be1d688652f to your computer and use it in GitHub Desktop.
Save mryap/b7cc27f96be1d688652f to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"path = 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "IOError",
"evalue": "[Errno 2] No such file or directory: 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mIOError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-2-bcaecf00da5d>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mIOError\u001b[0m: [Errno 2] No such file or directory: 'ch02/usagov_bitly_data2012-03-16-1331923249.txt'"
]
}
],
"source": [
"open(path).readline()\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "IOError",
"evalue": "[Errno 2] No such file or directory: 'usagov_bitly_data2012-03-16-1331923249.txt'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mIOError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-3-afd5e6727c5e>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mpath\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'usagov_bitly_data2012-03-16-1331923249.txt'\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mIOError\u001b[0m: [Errno 2] No such file or directory: 'usagov_bitly_data2012-03-16-1331923249.txt'"
]
}
],
"source": [
"path = 'usagov_bitly_data2012-03-16-1331923249.txt'\n",
"open(path).readline()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'{ \"a\": \"Mozilla\\\\/5.0 (Windows NT 6.1; WOW64) AppleWebKit\\\\/535.11 (KHTML, like Gecko) Chrome\\\\/17.0.963.78 Safari\\\\/535.11\", \"c\": \"US\", \"nk\": 1, \"tz\": \"America\\\\/New_York\", \"gr\": \"MA\", \"g\": \"A6qOVH\", \"h\": \"wfLQtf\", \"l\": \"orofrog\", \"al\": \"en-US,en;q=0.8\", \"hh\": \"1.usa.gov\", \"r\": \"http:\\\\/\\\\/www.facebook.com\\\\/l\\\\/7AQEFzjSi\\\\/1.usa.gov\\\\/wfLQtf\", \"u\": \"http:\\\\/\\\\/www.ncbi.nlm.nih.gov\\\\/pubmed\\\\/22415991\", \"t\": 1331923247, \"hc\": 1331822918, \"cy\": \"Danvers\", \"ll\": [ 42.576698, -70.954903 ] }\\n'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"path = 'usagov_bitly_data2012-03-16-1331923249.txt'\n",
"open(path).readline()\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'records' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-5-43e8cf08c977>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mrecords\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'records' is not defined"
]
}
],
"source": [
"records[0]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-6-13ea3eeed5d8>, line 1)",
"output_type": "error",
"traceback": [
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-6-13ea3eeed5d8>\"\u001b[1;36m, line \u001b[1;32m1\u001b[0m\n\u001b[1;33m time_zones = [rec['tz']]for rec in records\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"time_zones = [rec['tz']]for rec in records\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'reconds' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-7-41aacc0f7dcf>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtime_zones\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mrec\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tz'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;32mfor\u001b[0m \u001b[0mrec\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mreconds\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'reconds' is not defined"
]
}
],
"source": [
"time_zones = [rec['tz']for rec in reconds]\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'records' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-8-db4fbd348da9>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtime_zones\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mrec\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tz'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mrec\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrecords\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'records' is not defined"
]
}
],
"source": [
"time_zones = [rec['tz'] for rec in records]\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'records' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-9-32f5fd54a771>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtime_zones\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mrec\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tz'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mrec\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrecords\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;34m'tz'\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrec\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'records' is not defined"
]
}
],
"source": [
"time_zones = [rec['tz'] for rec in records if 'tz' in rec]\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'pat' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-10-53ddead9f562>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mjson\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mpat\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'pat' is not defined"
]
}
],
"source": [
"import json\n",
"pat"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import json\n",
"path ='usagov_bitly_data2012-03-16-1331923249.txt'\n",
"records = [json.loads(line)for line in open(path, 'rb')]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{u'a': u'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.78 Safari/535.11',\n",
" u'al': u'en-US,en;q=0.8',\n",
" u'c': u'US',\n",
" u'cy': u'Danvers',\n",
" u'g': u'A6qOVH',\n",
" u'gr': u'MA',\n",
" u'h': u'wfLQtf',\n",
" u'hc': 1331822918,\n",
" u'hh': u'1.usa.gov',\n",
" u'l': u'orofrog',\n",
" u'll': [42.576698, -70.954903],\n",
" u'nk': 1,\n",
" u'r': u'http://www.facebook.com/l/7AQEFzjSi/1.usa.gov/wfLQtf',\n",
" u't': 1331923247,\n",
" u'tz': u'America/New_York',\n",
" u'u': u'http://www.ncbi.nlm.nih.gov/pubmed/22415991'}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"records[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"records[0]['tz']"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"u'Danvers'"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"records[0]['cy']\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"ti"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"time_zones = [rec['tz'] for rec in records if 'tz' in rec]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[u'America/New_York',\n",
" u'America/Denver',\n",
" u'America/New_York',\n",
" u'America/Sao_Paulo',\n",
" u'America/New_York',\n",
" u'America/New_York',\n",
" u'Europe/Warsaw',\n",
" u'',\n",
" u'',\n",
" u'']"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"time_zones[:10]"
]
},
{
"cell_type": "raw",
"metadata": {
"collapsed": false
},
"source": [
"time_zones[:20]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-1-bd5c9d1cf95c>, line 1)",
"output_type": "error",
"traceback": [
"\u001b[1;36m File \u001b[1;32m\"<ipython-input-1-bd5c9d1cf95c>\"\u001b[1;36m, line \u001b[1;32m1\u001b[0m\n\u001b[1;33m from pandas impor DataFrame, Series\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"from pandas impor DataFrame, Series\n",
"import pandas as pd\n",
"frame = DataFrame(records)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'records' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-2-be125da02e16>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mSeries\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mframe\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrecords\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'records' is not defined"
]
}
],
"source": [
"from pandas import DataFrame, Series\n",
"import pandas as pd\n",
"frame = DataFrame(records)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'frame' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-3-366ced492cb8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mframe\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'frame' is not defined"
]
}
],
"source": [
"frame.info()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"counts = get_counts(time_zones)\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'get_counts' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-4-fe16d8a7f9f5>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mcounts\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_counts\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtime_zones\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'get_counts' is not defined"
]
}
],
"source": [
"counts = get_counts(time_zones)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'frame' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-5-366ced492cb8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mframe\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'frame' is not defined"
]
}
],
"source": [
"frame.info()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment