catherinedevlin/demonstrate_data_crunching.ipynb

## demonstrate_data_crunching.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Put this in Binder!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = [1, 2, 3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# First steps\n",
    "\n",
    "- get pip, make sure it runs on your machine\n",
    "\n",
    "`pip install jupyter`\n",
    "\n",
    "pip installs python libraries\n",
    "\n",
    "[Automate the Boring Stuff](https://automatetheboringstuff.com/)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[Requests](http://docs.python-requests.org/en/master/): the best Python library for downloading stuff from the web"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'https://www.census.gov/econ/cfs/2012/cfs_2012_pumf_csv.zip'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response = requests.get(url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response.ok"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response.elapsed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response.headers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import zipfile"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('transport_data.zip', 'wb') as outfile:\n",
    "    outfile.write(response.content)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!unzip transport_data.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!head cfs_2012_pumf_csv.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "infile = open('cfs_2012_pumf_csv.txt')\n",
    "reader = csv.reader(infile)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for line in reader:\n",
    "    print(line)\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "infile = open('cfs_2012_pumf_csv.txt')\n",
    "reader = csv.DictReader(infile)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for line in reader:\n",
    "    print(line)\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dict(line)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "infile = open('cfs_2012_pumf_csv.txt')\n",
    "reader = csv.DictReader(infile)\n",
    "total_weight = 0\n",
    "n_printed = 0\n",
    "for line in reader:\n",
    "    print(line['HAZMAT'])\n",
    "    n_printed = n_printed + 1\n",
    "    if n_printed > 1000:\n",
    "        break\n",
    "    #if line['HAZMAT'] == 'Y':\n",
    "    #    print(line)\n",
    "    #    weight = int(line['SHIPMT_WGHT'])\n",
    "    #    total_weight = total_weight + weight\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "infile = open('cfs_2012_pumf_csv.txt')   # open up the file\n",
    "reader = csv.DictReader(infile)          # set up a DictReader that converts raw lines into Python dictionaries\n",
    "total_weight = 0\n",
    "n_printed = 0\n",
    "for line in reader:  # do the following commmands once for every line in the file\n",
    "    if line['HAZMAT'] in ('P', 'H'):\n",
    "        print(line['SHIPMT_WGHT'])\n",
    "        weight = int(line['SHIPMT_WGHT'])   # convert string to integer\n",
    "        total_weight = total_weight + weight\n",
    "print('Total weight: ' + str(total_weight))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "total_weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Put this in Binder!"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"x = [1, 2, 3]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"print(x)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# First steps\n",
	"\n",
	"- get pip, make sure it runs on your machine\n",
	"\n",
	"`pip install jupyter`\n",
	"\n",
	"pip installs python libraries\n",
	"\n",
	"[Automate the Boring Stuff](https://automatetheboringstuff.com/)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"!pip install requests"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import requests"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"[Requests](http://docs.python-requests.org/en/master/): the best Python library for downloading stuff from the web"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"url = 'https://www.census.gov/econ/cfs/2012/cfs_2012_pumf_csv.zip'"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"response = requests.get(url)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"response"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"response.ok"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"response.elapsed"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"response.headers"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import zipfile"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"with open('transport_data.zip', 'wb') as outfile:\n",
	" outfile.write(response.content)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"!ls"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"!unzip transport_data.zip"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"!ls"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"!head cfs_2012_pumf_csv.txt"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import csv"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"infile = open('cfs_2012_pumf_csv.txt')\n",
	"reader = csv.reader(infile)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"for line in reader:\n",
	" print(line)\n",
	" break"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"infile = open('cfs_2012_pumf_csv.txt')\n",
	"reader = csv.DictReader(infile)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"for line in reader:\n",
	" print(line)\n",
	" break"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"dict(line)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"infile = open('cfs_2012_pumf_csv.txt')\n",
	"reader = csv.DictReader(infile)\n",
	"total_weight = 0\n",
	"n_printed = 0\n",
	"for line in reader:\n",
	" print(line['HAZMAT'])\n",
	" n_printed = n_printed + 1\n",
	" if n_printed > 1000:\n",
	" break\n",
	" #if line['HAZMAT'] == 'Y':\n",
	" # print(line)\n",
	" # weight = int(line['SHIPMT_WGHT'])\n",
	" # total_weight = total_weight + weight\n",
	" "
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"infile = open('cfs_2012_pumf_csv.txt') # open up the file\n",
	"reader = csv.DictReader(infile) # set up a DictReader that converts raw lines into Python dictionaries\n",
	"total_weight = 0\n",
	"n_printed = 0\n",
	"for line in reader: # do the following commmands once for every line in the file\n",
	" if line['HAZMAT'] in ('P', 'H'):\n",
	" print(line['SHIPMT_WGHT'])\n",
	" weight = int(line['SHIPMT_WGHT']) # convert string to integer\n",
	" total_weight = total_weight + weight\n",
	"print('Total weight: ' + str(total_weight))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"total_weight"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}