Skip to content

Instantly share code, notes, and snippets.

@lukerosiak
Created September 20, 2011 23:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lukerosiak/1230705 to your computer and use it in GitHub Desktop.
Save lukerosiak/1230705 to your computer and use it in GitHub Desktop.
Put columns for multiple geographies of ACS 2010 comparison profiles into more usable file, including combining with annotated file.
import csv
fout = csv.writer( open('cpflat.csv','wU') )
def process(i):
fin = csv.reader( open('ACS_10_1YR_CP0%s.csv' % i,'r') )
fin_ann = csv.reader( open('ACS_10_1YR_CP0%s_ann.csv' % i,'r') )
fin.next()
headers = fin.next()[3:]
headers2 = fin.next()[3:]
headers3 = fin.next()[3:]
fin_ann.next()
fin_ann.next()
fin_ann.next()
fin_ann.next()
headers4 = [None for x in range(len(headers))]
if i in [2,3]:
headers4 = fin.next()[3:]
fin_ann.next()
headers5 = [None for x in range(len(headers))]
if i in [3,]:
headers5 = fin.next()[3:]
fin_ann.next()
colseries = fin.next()[3:]
coltype = fin.next()[3:]
fin_ann.next()
fin_ann.next()
if i==2:
writeheaders = ['cp','geo1','geo2','geo3','h1','h2','h3','h4','h5'] + coltype[:9]
fout.writerow(writeheaders)
print writeheaders
else:
print i
hedindexes = []
oldhed = None
hedstart = 0
for j in range(len(headers)):
hed = (headers[j], headers2[j], headers3[j], headers4[j], headers5[j])
if oldhed and oldhed!=hed:
hedindexes.append( (oldhed, hedstart, j) )
hedstart = j
oldhed = hed
"""VC03 #CP2 header row starts on line 6; CP3 on line 7; CP4 on line 5
HOUSING OCCUPANCY
Total housing units
HC01
2010 Estimate"""
for line in fin:
line_ann = fin_ann.next()
geo = ['CP0%s'%i,] + line[:3]
geo_ann = ['CP0%s'%i,] + line[:3]
if geo != geo_ann: print geo, ' != ', geo_ann
for h in hedindexes:
hed = h[0]
start = h[1]
stop = h[2]
vals = [ line[start+3], line[start+4], line_ann[start+5],
line[start+6], line_ann[start+7],
line[start+8], line_ann[start+9],
line[start+10], line_ann[start+11],
]
outline = geo + list(hed) + vals
fout.writerow( outline )
for i in [2,3,4]: #i didn't use CP_05, but it should work in addition to 2, 3 and 4
process(i)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment