Skip to content

Instantly share code, notes, and snippets.

@simonw
Created October 24, 2019 04:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simonw/bbde42c1e42d13107a17a050c3b5993f to your computer and use it in GitHub Desktop.
Save simonw/bbde42c1e42d13107a17a050c3b5993f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Importing Louisiana bills for a Big Local project\n",
"\n",
"We have a collection of zip files containing \"bills\" - where each bill is described using JSON.\n",
"\n",
"I'm going to import them into a SQLite database using https://sqlite-utils.readthedocs.io/ so they can be explored using Datasette https://datasette.readthedocs.io/\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import sqlite_utils"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It looks to me like the interesting stuff is in the bills JSON files inside the zips.\n",
"\n",
"e.g. `/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA 2/2019-2019_Regular_Session/bill/HB4.json`"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"bill = json.load(open(\"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA 2/2019-2019_Regular_Session/bill/HB4.json\"))"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['bill'])"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bill.keys()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"{'bill_id': 1187160,\n",
" 'change_hash': 'a3f129234f61f5f54285aef24149bed2',\n",
" 'session_id': 1634,\n",
" 'session': {'session_id': 1634,\n",
" 'session_name': '2019 Regular Session',\n",
" 'session_title': 'Regular Session',\n",
" 'year_start': 2019,\n",
" 'year_end': 2019,\n",
" 'special': 0},\n",
" 'url': 'https://legiscan.com/LA/bill/HB4/2019',\n",
" 'state_link': 'http://www.legis.la.gov/legis/BillInfo.aspx?s=19rs&b=HB4&sbi=y',\n",
" 'completed': 1,\n",
" 'status': 4,\n",
" 'status_date': '2019-06-11',\n",
" 'progress': [{'date': '2019-01-28', 'event': 1},\n",
" {'date': '2019-01-28', 'event': 9},\n",
" {'date': '2019-04-08', 'event': 9},\n",
" {'date': '2019-05-08', 'event': 10},\n",
" {'date': '2019-05-21', 'event': 2},\n",
" {'date': '2019-05-22', 'event': 9},\n",
" {'date': '2019-05-28', 'event': 10},\n",
" {'date': '2019-06-03', 'event': 3},\n",
" {'date': '2019-06-11', 'event': 8},\n",
" {'date': '2019-06-11', 'event': 4}],\n",
" 'state': 'LA',\n",
" 'state_id': 18,\n",
" 'bill_number': 'HB4',\n",
" 'bill_type': 'B',\n",
" 'bill_type_id': '1',\n",
" 'body': 'H',\n",
" 'body_id': 45,\n",
" 'current_body': 'H',\n",
" 'current_body_id': 45,\n",
" 'title': \"Provides relative to the procedures for sheriff's sales of property\",\n",
" 'description': \"Provides relative to the procedures for sheriff's sales of property\",\n",
" 'committee': [],\n",
" 'pending_committee_id': 0,\n",
" 'history': [{'date': '2019-01-28',\n",
" 'action': 'Prefiled.',\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'importance': 1},\n",
" {'date': '2019-01-28',\n",
" 'action': 'Under the rules, provisionally referred to the Committee on Judiciary.',\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'importance': 1},\n",
" {'date': '2019-02-01',\n",
" 'action': 'First appeared in the Interim Calendar on 2/1/2019.',\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'importance': 0},\n",
" {'date': '2019-04-08',\n",
" 'action': 'Read by title, under the rules, referred to the Committee on Judiciary.',\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'importance': 1},\n",
" {'date': '2019-05-08',\n",
" 'action': 'Reported with amendments (13-0).',\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'importance': 1},\n",
" {'date': '2019-05-09',\n",
" 'action': 'Read by title, amended, ordered engrossed, passed to 3rd reading.',\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'importance': 0},\n",
" {'date': '2019-05-09',\n",
" 'action': 'Scheduled for floor debate on 05/21/19.',\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'importance': 0},\n",
" {'date': '2019-05-21',\n",
" 'action': 'Read third time by title, roll called on final passage, yeas 96, nays 0. Finally passed, title adopted, ordered to the Senate.',\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'importance': 0},\n",
" {'date': '2019-05-21',\n",
" 'action': 'Received in the Senate. Rules suspended. Read first time by title and placed on the Calendar for a second reading.',\n",
" 'chamber': 'S',\n",
" 'chamber_id': 46,\n",
" 'importance': 1},\n",
" {'date': '2019-05-22',\n",
" 'action': 'Read second time by title and referred to the Committee on Judiciary B.',\n",
" 'chamber': 'S',\n",
" 'chamber_id': 46,\n",
" 'importance': 1},\n",
" {'date': '2019-05-28',\n",
" 'action': 'Rules suspended. Reported favorably.',\n",
" 'chamber': 'S',\n",
" 'chamber_id': 46,\n",
" 'importance': 1},\n",
" {'date': '2019-05-29',\n",
" 'action': 'Read by title and referred to the Legislative Bureau.',\n",
" 'chamber': 'S',\n",
" 'chamber_id': 46,\n",
" 'importance': 0},\n",
" {'date': '2019-05-30',\n",
" 'action': 'Reported without Legislative Bureau amendments. Read by title and passed to third reading and final passage.',\n",
" 'chamber': 'S',\n",
" 'chamber_id': 46,\n",
" 'importance': 0},\n",
" {'date': '2019-06-02',\n",
" 'action': 'Rules suspended. Read by title, passed by a vote of 35 yeas and 1 nays, and ordered returned to the House. Motion to reconsider tabled.',\n",
" 'chamber': 'S',\n",
" 'chamber_id': 46,\n",
" 'importance': 0},\n",
" {'date': '2019-06-02',\n",
" 'action': 'Received from the Senate without amendments.',\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'importance': 0},\n",
" {'date': '2019-06-03',\n",
" 'action': 'Enrolled and signed by the Speaker of the House.',\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'importance': 1},\n",
" {'date': '2019-06-04',\n",
" 'action': 'Signed by the President of the Senate.',\n",
" 'chamber': 'S',\n",
" 'chamber_id': 46,\n",
" 'importance': 0},\n",
" {'date': '2019-06-05',\n",
" 'action': 'Sent to the Governor for executive approval.',\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'importance': 0},\n",
" {'date': '2019-06-11',\n",
" 'action': 'Signed by the Governor. Becomes Act No. 415.',\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'importance': 1}],\n",
" 'sponsors': [{'people_id': 17824,\n",
" 'person_hash': '0cys0f51',\n",
" 'party_id': 2,\n",
" 'party': 'R',\n",
" 'role_id': 1,\n",
" 'role': 'Rep',\n",
" 'name': 'Stephen Dwight',\n",
" 'first_name': 'Stephen',\n",
" 'middle_name': 'C.',\n",
" 'last_name': 'Dwight',\n",
" 'suffix': '',\n",
" 'nickname': '',\n",
" 'district': 'HD-035',\n",
" 'ftm_eid': 32373968,\n",
" 'votesmart_id': 165523,\n",
" 'opensecrets_id': '',\n",
" 'ballotpedia': 'Stephen_Dwight',\n",
" 'sponsor_type_id': 1,\n",
" 'sponsor_order': 1,\n",
" 'committee_sponsor': 0,\n",
" 'committee_id': '0'}],\n",
" 'sasts': [],\n",
" 'subjects': [{'subject_id': 3827, 'subject_name': 'PROPERTY/IMMOVABLE'}],\n",
" 'texts': [{'doc_id': 1888068,\n",
" 'date': '0000-00-00',\n",
" 'type': 'Introduced',\n",
" 'type_id': 1,\n",
" 'mime': 'application/pdf',\n",
" 'mime_id': 2,\n",
" 'url': 'https://legiscan.com/LA/text/HB4/id/1888068',\n",
" 'state_link': 'http://www.legis.la.gov/Legis/ViewDocument.aspx?d=1111410',\n",
" 'text_size': 38480},\n",
" {'doc_id': 2012585,\n",
" 'date': '0000-00-00',\n",
" 'type': 'Engrossed',\n",
" 'type_id': 4,\n",
" 'mime': 'application/pdf',\n",
" 'mime_id': 2,\n",
" 'url': 'https://legiscan.com/LA/text/HB4/id/2012585',\n",
" 'state_link': 'http://www.legis.la.gov/Legis/ViewDocument.aspx?d=1132572',\n",
" 'text_size': 40755},\n",
" {'doc_id': 2031989,\n",
" 'date': '0000-00-00',\n",
" 'type': 'Enrolled',\n",
" 'type_id': 5,\n",
" 'mime': 'application/pdf',\n",
" 'mime_id': 2,\n",
" 'url': 'https://legiscan.com/LA/text/HB4/id/2031989',\n",
" 'state_link': 'http://www.legis.la.gov/Legis/ViewDocument.aspx?d=1141455',\n",
" 'text_size': 36084},\n",
" {'doc_id': 2042583,\n",
" 'date': '0000-00-00',\n",
" 'type': 'Chaptered',\n",
" 'type_id': 6,\n",
" 'mime': 'application/pdf',\n",
" 'mime_id': 2,\n",
" 'url': 'https://legiscan.com/LA/text/HB4/id/2042583',\n",
" 'state_link': 'http://www.legis.la.gov/Legis/ViewDocument.aspx?d=1144887',\n",
" 'text_size': 42881}],\n",
" 'votes': [{'roll_call_id': 868132,\n",
" 'date': '2019-05-21',\n",
" 'desc': 'House Vote on HB 4 FINAL PASSAGE (#451)',\n",
" 'yea': 96,\n",
" 'nay': 0,\n",
" 'nv': 0,\n",
" 'absent': 9,\n",
" 'total': 105,\n",
" 'passed': 1,\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'url': 'https://legiscan.com/LA/rollcall/HB4/id/868132',\n",
" 'state_link': 'http://www.legis.la.gov/Legis/ViewDocument.aspx?d=1136368'},\n",
" {'roll_call_id': 876289,\n",
" 'date': '2019-06-02',\n",
" 'desc': 'Senate Vote on HB 4 FINAL PASSAGE (#772)',\n",
" 'yea': 35,\n",
" 'nay': 1,\n",
" 'nv': 0,\n",
" 'absent': 3,\n",
" 'total': 39,\n",
" 'passed': 1,\n",
" 'chamber': 'S',\n",
" 'chamber_id': 46,\n",
" 'url': 'https://legiscan.com/LA/rollcall/HB4/id/876289',\n",
" 'state_link': 'http://www.legis.la.gov/Legis/ViewDocument.aspx?d=1141000'}],\n",
" 'amendments': [{'amendment_id': 73639,\n",
" 'adopted': 0,\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'date': '0000-00-00',\n",
" 'title': 'House Committee Amendment #1017 JUD Draft',\n",
" 'description': 'House Committee Amendment #1017 JUD Draft',\n",
" 'mime': 'application/pdf',\n",
" 'mime_id': 2,\n",
" 'url': 'https://legiscan.com/LA/amendment/HB4/id/73639',\n",
" 'state_link': 'http://www.legis.la.gov/Legis/ViewDocument.aspx?d=1123678'},\n",
" {'amendment_id': 75936,\n",
" 'adopted': 0,\n",
" 'chamber': 'H',\n",
" 'chamber_id': 45,\n",
" 'date': '0000-00-00',\n",
" 'title': 'House Committee Amendment #2398 JUD Proposed',\n",
" 'description': 'House Committee Amendment #2398 JUD Proposed',\n",
" 'mime': 'application/pdf',\n",
" 'mime_id': 2,\n",
" 'url': 'https://legiscan.com/LA/amendment/HB4/id/75936',\n",
" 'state_link': 'http://www.legis.la.gov/Legis/ViewDocument.aspx?d=1132459'}],\n",
" 'supplements': [{'supplement_id': 94072,\n",
" 'date': '0000-00-00',\n",
" 'type': 'Analysis',\n",
" 'type_id': 2,\n",
" 'title': 'Analysis',\n",
" 'description': 'Digest of HB4 Original',\n",
" 'mime': 'application/pdf',\n",
" 'mime_id': 2,\n",
" 'url': 'https://legiscan.com/LA/supplement/HB4/id/94072',\n",
" 'state_link': 'http://www.legis.la.gov/Legis/ViewDocument.aspx?d=1111412'},\n",
" {'supplement_id': 109022,\n",
" 'date': '0000-00-00',\n",
" 'type': 'Analysis',\n",
" 'type_id': 2,\n",
" 'title': 'Analysis',\n",
" 'description': 'Digest of HB4 Engrossed',\n",
" 'mime': 'application/pdf',\n",
" 'mime_id': 2,\n",
" 'url': 'https://legiscan.com/LA/supplement/HB4/id/109022',\n",
" 'state_link': 'http://www.legis.la.gov/Legis/ViewDocument.aspx?d=1132897'},\n",
" {'supplement_id': 110800,\n",
" 'date': '0000-00-00',\n",
" 'type': 'Analysis',\n",
" 'type_id': 2,\n",
" 'title': 'Analysis',\n",
" 'description': 'Resume Digest for HB4',\n",
" 'mime': 'application/pdf',\n",
" 'mime_id': 2,\n",
" 'url': 'https://legiscan.com/LA/supplement/HB4/id/110800',\n",
" 'state_link': 'http://www.legis.la.gov/Legis/ViewDocument.aspx?d=1145650'}],\n",
" 'calendar': [{'type_id': 1,\n",
" 'type': 'Hearing',\n",
" 'date': '2019-05-08',\n",
" 'time': '09:30',\n",
" 'location': 'Room 1',\n",
" 'description': 'House Committee on Judiciary'},\n",
" {'type_id': 1,\n",
" 'type': 'Hearing',\n",
" 'date': '2019-05-28',\n",
" 'time': '09:00',\n",
" 'location': 'Room E',\n",
" 'description': 'Senate Committee on Judiciary B'}]}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bill['bill']"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
"def save_bill(db, bill):\n",
" bill = dict(bill)\n",
" session = bill.pop(\"session\")\n",
" session[\"name\"] = session.pop(\"session_name\")\n",
" bill_id = bill[\"bill_id\"]\n",
" db[\"sessions\"].upsert(session, pk=\"session_id\")\n",
" \n",
" # Does it have a committee?\n",
" committee = bill.pop(\"committee\", None)\n",
" if committee:\n",
" db[\"committees\"].upsert(committee, pk=\"committee_id\")\n",
" bill[\"committee\"] = committee[\"committee_id\"]\n",
" \n",
" # Bill types\n",
" bill_type = {\n",
" \"bill_type_id\": bill[\"bill_type_id\"],\n",
" \"name\": bill.pop(\"bill_type\"),\n",
" }\n",
" db[\"bill_types\"].upsert(bill_type, pk=\"bill_type_id\")\n",
" # history, sponser etc items will be handled separately\n",
" history = bill.pop(\"history\", None) or []\n",
" sponsors = bill.pop(\"sponsors\", None) or []\n",
" progress = bill.pop(\"progress\", None) or []\n",
" subjects = bill.pop(\"subjects\", None) or []\n",
" texts = bill.pop(\"texts\", None) or []\n",
" votes = bill.pop(\"votes\", None) or []\n",
" amendments = bill.pop(\"amendments\", None) or []\n",
" supplements = bill.pop(\"supplements\", None) or []\n",
" calendar = bill.pop(\"calendar\", None) or []\n",
" sasts = bill.pop(\"sasts\", None) or []\n",
" \n",
" db[\"bills\"].upsert(bill, pk=\"bill_id\", foreign_keys=(\"session_id\", \"bill_type_id\", \"committee\"), column_order=(\"bill_id\", \"bill_number\", \"session_id\", \"title\", \"description\", \"url\"))\n",
" \n",
" # Now save all the stuff we stashed earlier\n",
" for item in history:\n",
" item[\"bill_id\"] = bill_id\n",
" item[\"id\"] = id_for_history(item)\n",
" db[\"history\"].upsert_all(history, pk=\"id\", foreign_keys=(\"bill_id\",))\n",
" \n",
" for item in sponsors:\n",
" db[\"people\"].upsert(item, pk=\"people_id\")\n",
" db[\"sponsors\"].upsert({\n",
" \"bill_id\": bill_id,\n",
" \"people_id\": item[\"people_id\"],\n",
" },\n",
" pk=(\"bill_id\", \"people_id\"),\n",
" foreign_keys=((\"bill_id\", \"bills\", \"bill_id\"), (\"people_id\", \"people\", \"people_id\")),\n",
" )\n",
"\n",
" for subject in subjects:\n",
" db[\"subjects\"].upsert({\n",
" \"subject_id\": subject[\"subject_id\"],\n",
" \"name\": subject[\"subject_name\"],\n",
" }, pk=\"subject_id\")\n",
" db[\"bill_subjects\"].upsert({\n",
" \"bill_id\": bill_id,\n",
" \"subject_id\": subject[\"subject_id\"],\n",
" }, pk=(\"bill_id\", \"subject_id\"), foreign_keys=(\"bill_id\", \"subject_id\"))\n",
"\n",
" for item in texts:\n",
" item[\"bill_id\"] = bill_id\n",
" db[\"texts\"].upsert_all(texts, pk=\"doc_id\", foreign_keys=(\"bill_id\",))\n",
"\n",
" for item in votes:\n",
" item[\"bill_id\"] = bill_id\n",
" db[\"votes\"].upsert_all(votes, pk=\"roll_call_id\", foreign_keys=(\"bill_id\",))\n",
"\n",
" for item in amendments:\n",
" item[\"bill_id\"] = bill_id\n",
" db[\"amendments\"].upsert_all(amendments, pk=\"amendment_id\", foreign_keys=(\"bill_id\",))\n",
"\n",
" for item in supplements:\n",
" item[\"bill_id\"] = bill_id\n",
" db[\"supplements\"].upsert_all(supplements, pk=\"supplement_id\", foreign_keys=(\"bill_id\",))\n",
"\n",
" for item in calendar:\n",
" item[\"bill_id\"] = bill_id\n",
" db[\"calendars\"].upsert_all(calendar, hash_id=\"id\", foreign_keys=(\"bill_id\",))\n",
"\n",
" for item in progress:\n",
" item[\"bill_id\"] = bill_id\n",
" db[\"progress\"].upsert_all(progress, hash_id=\"id\", foreign_keys=(\"bill_id\",))\n",
"\n",
" for item in sasts:\n",
" item[\"bill_id\"] = bill_id\n",
" db[\"sasts\"].upsert_all(sasts, hash_id=\"id\", foreign_keys=(\"bill_id\",))"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"import glob\n",
"import hashlib"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"def id_for_history(history):\n",
" return history[\"date\"] + '-' + hashlib.sha1(\n",
" json.dumps(history, separators=(\",\", \":\"), sort_keys=True).encode(\"utf8\")\n",
" ).hexdigest()\n"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"# Now do it for ALL of the zip files"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2017-2017_1st_Special_Session_JSON_20170303_1c6c379a0812d16cc6e37c58edd49b07.zip'"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"path = glob.glob(\"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/*.zip\")[0]\n",
"path"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"import zipfile\n",
"from tqdm import tqdm_notebook"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2017-2017_1st_Special_Session_JSON_20170303_1c6c379a0812d16cc6e37c58edd49b07.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "844d63d16f59497c8ff0a3b6838bdee0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=63), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2016-2016_1st_Special_Session_JSON_20160323_54f6629215b7154f6016bbc6916d07c8.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "25817160ff5d4374a7bb3fbbfd0ec9cb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=208), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2010-2010_Regular_Session_JSON_20100709_b48471018582304f8fea4310bca02252.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "65b8444d8a074beabd499e2ea144d0f3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=3150), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2018-2018_1st_Special_Session_JSON_20180326_0f61f2b1744734fc449aedac4e63ba8b.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8cc8648c5cf34a72b39676e0e348db32",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=71), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2016-2016_Regular_Session_JSON_20160622_44a138d299546438ecdd49ac30f8b404.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ff9fa6fbe50d4480a843ab5739510113",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=2404), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2014-2014_Regular_Session_JSON_20140623_75d27dc7b6cb8d01313debe183d9a404.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3bdc8d6fce20426c9873e435f9428f90",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=2823), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2018-2018_Regular_Session_JSON_20180607_be428bc5dc290eadb68ccfe4da9ee16e.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "39f3eb471aaf4d98b41f9fa947180e54",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=2216), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2017-2017_2nd_Special_Session_JSON_20170629_ff2878ba3ef09842473fe77c9489d54e.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "968b351e428847bf9a67ed94e9c064da",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=37), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2011-2011_1st_Special_Session_JSON_20110415_5977d4e451f6d1f267cf996a85dcab8f.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fefe0b5195d540ae9e6f454d64494450",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=179), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2015-2015_Regular_Session_JSON_20150701_41b9a0c9e02faf3331fe9b27474e768b.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0a44692a615445a49a9e666cb8311e6c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=1974), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2012-2012_Regular_Session_JSON_20120615_86b9c3de62acdf989e847ffd88f9bf17.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3cdf1358d6944d8e89a9602295cff177",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=2734), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2013-2013_Regular_Session_JSON_20130626_cc2bcba14d9205cddc803b1bf0f6b8ba.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "18818d7ab99349d49bb6a775c40e436e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=1763), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2017-2017_Regular_Session_JSON_20170630_625263f55b207673251bfd0fa17c8b11.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "06b6e13e98084183921631ec01f2f15c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=1670), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2018-2018_2nd_Special_Session_JSON_20180618_bb419aa09bc14b6b8ff5968196a24b35.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b69e2bae574349c2a28018de713cf5e7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=172), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2019-2019_Regular_Session_JSON_20190730_4edbf0b284d5bf72c6da92b76c626573.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7b1b7e14e2bd40779f8478d5510ff2de",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=1735), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2018-2018_3rd_Special_Session_JSON_20180627_700e121dfd5a2320844ba34f0cb998ca.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "47426fbc35a24b86b986c319ab3c92ea",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=79), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2016-2016_2nd_Special_Session_JSON_20160711_e04cf88c7b13e09c4594eb02af5b8249.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fb5d2ead2b4c4a5cb1ad16be8644142b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=169), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/LA_2011-2011_Regular_Session_JSON_20110713_27c4c635ef1d569710775409f8a7a04b.zip\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2e02e650476c4057adb8dd9ec6240c73",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=1583), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"db = sqlite_utils.Database(\"/tmp/la_bills.db\")\n",
"\n",
"for zippath in glob.glob(\"/Users/simonw/Dropbox/Development/big-local-2019/drive-download-20191023T183757Z-001/*.zip\"):\n",
" zf = zipfile.ZipFile(zippath)\n",
" print(zippath)\n",
" filenames = [f.filename for f in zf.filelist]\n",
" bills = [f for f in filenames if \"/bill/\" in f and f.endswith(\".json\")]\n",
" for bill in tqdm_notebook(bills):\n",
" bill = json.loads(zf.open(bill).read())['bill']\n",
" save_bill(db, bill)\n"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<Table bills (bill_id, bill_number, session_id, title, description, url, change_hash, state_link, completed, status, status_date, state, state_id, bill_type_id, body, body_id, current_body, current_body_id, pending_committee_id, committee)>"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Make it so you can search against the title and decsription fields:\n",
"db[\"bills\"].enable_fts([\"title\", \"description\"], create_triggers=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment