lukerosiak

## CatoApprops.py
import os
import json
import re
import csv

from bs4 import BeautifulSoup


#format numbers
def commafy(x):

## embedsenate2014
PREFERRED EMBED CODE (widget):

<div id="2014senatetossupsmap_hype_container" style="position:relative;overflow:hidden;width:600px;height:500px;">
<script type="text/javascript" charset="utf-8" src="http://s3.amazonaws.com/examiner/2014battleground/2014+SENATE+tossups+map.hyperesources/2014senatetossupsmap_hype_generated_script.js?70501"></script>
</div>

NON-PREFERRED EMBED CODE IF THAT DOESN'T WORK (iframe)

<iframe src="http://s3.amazonaws.com/examiner/2014battleground/map.html" width="620" height="500" scrolling="no" frameborder="no"/>

## mirror990s.py
import os
import boto

"""
Mirror the entire nonprofittext S3 bucket, downloading only files that aren't already present or if the S3 version is larger than the one we have.

The only dependency is boto. To install: pip install boto

To run: python download.py

## mlb.py
#This web site lists recent injuries for MLB players in HTML format, but requires you to click each team, etc.
#http://mlb.mlb.com/mlb/fantasy/injuries/
#To do real data analysis, we want a shitton and don't have time to click everywhere.
#So our exercise is to get all available injuries into one easy to use spreadsheet.

#By looking at "view source" on the web site, I found that the web site actually hits another web site, which provides the injuries, trades and other info in a computer-readable format called JSON, which is basically
#the same as python's dictionary type. You can only get one month at a time bc there are so many. See it here:
#http://mlb.mlb.com/lookup/json/named.transaction_all.bam?start_date=20120301&end_date=20120401&sport_code='mlb'
#Our code will hit this web site repeatedly for different dates, convert the web site's content into a python object, and then write certain fields from that object to a file of the CSV format

## gist:5328017
#import all Census 2010 tables into PostgreSQL. Then use BoundaryService to import TIGER shapefiles into PostGIS and join them.

import os

s = """ire_H1.sql  a year ago	create table DDL for bulkdata export format [JoeGermuska]
ire_H10.sql	a year ago	create table DDL for bulkdata export format [JoeGermuska]
ire_H11.sql	a year ago	create table DDL for bulkdata export format [JoeGermuska]
ire_H11A.sql	a year ago	create table DDL for bulkdata export format [JoeGermuska]
ire_H11B.sql	a year ago	create table DDL for bulkdata export format [JoeGermuska]
ire_H11C.sql	a year ago	create table DDL for bulkdata export format [JoeGermuska]

## OLMS
import os
import psycopg2

#IMPORT AND UNZIL ALL YEARS OF FILES INTO THIS DIRECTORY
#SET VARIABLES IN THE NEXT 3 LINES
path = '/media/sf_bulk/labor/data/'
years = [str(x) for x in range(2000,2013)]

conn = psycopg2.connect(database="labor", user="", password="")

## diff-different-lines-only.txt
OFFICE OF THE MINORITY WHIP,2009Q4,PERSONNEL COMPENSATION,,"D
							      |	OFFICE OF THE MINORITY WHIP,2009Q4,PERSONNEL COMPENSATION,,"D

CAO OPERATIONS MANAGEMENT,2009Q4,TRAVEL,12-03,DARRYL A ATCHIS
							      |	CAO OPERATIONS MANAGEMENT,2009Q4,TRAVEL,12-03,DOUGLAS MASSENG

COMMUNICATIONS,2009Q4,SUPPLIES AND MATERIALS,12-17,,,,FRAMING
							      |	COMMUNICATIONS,2009Q4,SUPPLIES AND MATERIALS,12-17,FRAMING,,,

COMMUNICATIONS,2009Q4,SUPPLIES AND MATERIALS,12-17,,,,FRAMING

## strip.py
"""
Ensure the new and old fields uses the same CSV quoting conventions and format decimals the same way (15.00 vs 15 and 16.10 vs 16.1), so we can run a diff without being distracted those differences.
"""


import csv

fin = csv.reader(open('../../archives/3_csv_original/2011Q3-summary-sunlight.csv','r'))
fout = csv.writer(open('../../archives/3_csv_original/2011Q3-summary-sunlight-stripped.csv','w'))

## flattenfactfinder.py
import csv

fout = csv.writer( open('cpflat.csv','wU') )

def process(i):
    fin = csv.reader( open('ACS_10_1YR_CP0%s.csv' % i,'r') )
    fin_ann = csv.reader( open('ACS_10_1YR_CP0%s_ann.csv' % i,'r') )

    fin.next()
    headers = fin.next()[3:]
	import os
	import json
	import re
	import csv

	from bs4 import BeautifulSoup


	#format numbers
	def commafy(x):
	PREFERRED EMBED CODE (widget):

	<div id="2014senatetossupsmap_hype_container" style="position:relative;overflow:hidden;width:600px;height:500px;">
	<script type="text/javascript" charset="utf-8" src="http://s3.amazonaws.com/examiner/2014battleground/2014+SENATE+tossups+map.hyperesources/2014senatetossupsmap_hype_generated_script.js?70501"></script>
	</div>

	NON-PREFERRED EMBED CODE IF THAT DOESN'T WORK (iframe)

	<iframe src="http://s3.amazonaws.com/examiner/2014battleground/map.html" width="620" height="500" scrolling="no" frameborder="no"/>
	import os
	import boto

	"""
	Mirror the entire nonprofittext S3 bucket, downloading only files that aren't already present or if the S3 version is larger than the one we have.

	The only dependency is boto. To install: pip install boto

	To run: python download.py
	#This web site lists recent injuries for MLB players in HTML format, but requires you to click each team, etc.
	#http://mlb.mlb.com/mlb/fantasy/injuries/
	#To do real data analysis, we want a shitton and don't have time to click everywhere.
	#So our exercise is to get all available injuries into one easy to use spreadsheet.

	#By looking at "view source" on the web site, I found that the web site actually hits another web site, which provides the injuries, trades and other info in a computer-readable format called JSON, which is basically
	#the same as python's dictionary type. You can only get one month at a time bc there are so many. See it here:
	#http://mlb.mlb.com/lookup/json/named.transaction_all.bam?start_date=20120301&end_date=20120401&sport_code='mlb'
	#Our code will hit this web site repeatedly for different dates, convert the web site's content into a python object, and then write certain fields from that object to a file of the CSV format
	#import all Census 2010 tables into PostgreSQL. Then use BoundaryService to import TIGER shapefiles into PostGIS and join them.

	import os

	s = """ire_H1.sql a year ago create table DDL for bulkdata export format [JoeGermuska]
	ire_H10.sql a year ago create table DDL for bulkdata export format [JoeGermuska]
	ire_H11.sql a year ago create table DDL for bulkdata export format [JoeGermuska]
	ire_H11A.sql a year ago create table DDL for bulkdata export format [JoeGermuska]
	ire_H11B.sql a year ago create table DDL for bulkdata export format [JoeGermuska]
	ire_H11C.sql a year ago create table DDL for bulkdata export format [JoeGermuska]
	import os
	import psycopg2

	#IMPORT AND UNZIL ALL YEARS OF FILES INTO THIS DIRECTORY
	#SET VARIABLES IN THE NEXT 3 LINES
	path = '/media/sf_bulk/labor/data/'
	years = [str(x) for x in range(2000,2013)]

	conn = psycopg2.connect(database="labor", user="", password="")
	OFFICE OF THE MINORITY WHIP,2009Q4,PERSONNEL COMPENSATION,,"D
	\| OFFICE OF THE MINORITY WHIP,2009Q4,PERSONNEL COMPENSATION,,"D

	CAO OPERATIONS MANAGEMENT,2009Q4,TRAVEL,12-03,DARRYL A ATCHIS
	\| CAO OPERATIONS MANAGEMENT,2009Q4,TRAVEL,12-03,DOUGLAS MASSENG

	COMMUNICATIONS,2009Q4,SUPPLIES AND MATERIALS,12-17,,,,FRAMING
	\| COMMUNICATIONS,2009Q4,SUPPLIES AND MATERIALS,12-17,FRAMING,,,

	COMMUNICATIONS,2009Q4,SUPPLIES AND MATERIALS,12-17,,,,FRAMING
	"""
	Ensure the new and old fields uses the same CSV quoting conventions and format decimals the same way (15.00 vs 15 and 16.10 vs 16.1), so we can run a diff without being distracted those differences.
	"""


	import csv

	fin = csv.reader(open('../../archives/3_csv_original/2011Q3-summary-sunlight.csv','r'))
	fout = csv.writer(open('../../archives/3_csv_original/2011Q3-summary-sunlight-stripped.csv','w'))
	import csv

	fout = csv.writer( open('cpflat.csv','wU') )

	def process(i):
	fin = csv.reader( open('ACS_10_1YR_CP0%s.csv' % i,'r') )
	fin_ann = csv.reader( open('ACS_10_1YR_CP0%s_ann.csv' % i,'r') )

	fin.next()
	headers = fin.next()[3:]