Skip to content

Instantly share code, notes, and snippets.

@catherinedevlin
Created February 13, 2018 19:29
Show Gist options
  • Save catherinedevlin/2ba748e3d2e0187974651e9224518a08 to your computer and use it in GitHub Desktop.
Save catherinedevlin/2ba748e3d2e0187974651e9224518a08 to your computer and use it in GitHub Desktop.
Data crunching demo
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Put this in Binder!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x = [1, 2, 3]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(x)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# First steps\n",
"\n",
"- get pip, make sure it runs on your machine\n",
"\n",
"`pip install jupyter`\n",
"\n",
"pip installs python libraries\n",
"\n",
"[Automate the Boring Stuff](https://automatetheboringstuff.com/)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install requests"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[Requests](http://docs.python-requests.org/en/master/): the best Python library for downloading stuff from the web"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = 'https://www.census.gov/econ/cfs/2012/cfs_2012_pumf_csv.zip'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"response = requests.get(url)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"response"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"response.ok"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"response.elapsed"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"response.headers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import zipfile"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('transport_data.zip', 'wb') as outfile:\n",
" outfile.write(response.content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!ls"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!unzip transport_data.zip"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!ls"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!head cfs_2012_pumf_csv.txt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import csv"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"infile = open('cfs_2012_pumf_csv.txt')\n",
"reader = csv.reader(infile)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for line in reader:\n",
" print(line)\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"infile = open('cfs_2012_pumf_csv.txt')\n",
"reader = csv.DictReader(infile)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for line in reader:\n",
" print(line)\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dict(line)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"infile = open('cfs_2012_pumf_csv.txt')\n",
"reader = csv.DictReader(infile)\n",
"total_weight = 0\n",
"n_printed = 0\n",
"for line in reader:\n",
" print(line['HAZMAT'])\n",
" n_printed = n_printed + 1\n",
" if n_printed > 1000:\n",
" break\n",
" #if line['HAZMAT'] == 'Y':\n",
" # print(line)\n",
" # weight = int(line['SHIPMT_WGHT'])\n",
" # total_weight = total_weight + weight\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"infile = open('cfs_2012_pumf_csv.txt') # open up the file\n",
"reader = csv.DictReader(infile) # set up a DictReader that converts raw lines into Python dictionaries\n",
"total_weight = 0\n",
"n_printed = 0\n",
"for line in reader: # do the following commmands once for every line in the file\n",
" if line['HAZMAT'] in ('P', 'H'):\n",
" print(line['SHIPMT_WGHT'])\n",
" weight = int(line['SHIPMT_WGHT']) # convert string to integer\n",
" total_weight = total_weight + weight\n",
"print('Total weight: ' + str(total_weight))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"total_weight"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment