Created
September 20, 2011 23:10
-
-
Save lukerosiak/1230705 to your computer and use it in GitHub Desktop.
Put columns for multiple geographies of ACS 2010 comparison profiles into more usable file, including combining with annotated file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
fout = csv.writer( open('cpflat.csv','wU') ) | |
def process(i): | |
fin = csv.reader( open('ACS_10_1YR_CP0%s.csv' % i,'r') ) | |
fin_ann = csv.reader( open('ACS_10_1YR_CP0%s_ann.csv' % i,'r') ) | |
fin.next() | |
headers = fin.next()[3:] | |
headers2 = fin.next()[3:] | |
headers3 = fin.next()[3:] | |
fin_ann.next() | |
fin_ann.next() | |
fin_ann.next() | |
fin_ann.next() | |
headers4 = [None for x in range(len(headers))] | |
if i in [2,3]: | |
headers4 = fin.next()[3:] | |
fin_ann.next() | |
headers5 = [None for x in range(len(headers))] | |
if i in [3,]: | |
headers5 = fin.next()[3:] | |
fin_ann.next() | |
colseries = fin.next()[3:] | |
coltype = fin.next()[3:] | |
fin_ann.next() | |
fin_ann.next() | |
if i==2: | |
writeheaders = ['cp','geo1','geo2','geo3','h1','h2','h3','h4','h5'] + coltype[:9] | |
fout.writerow(writeheaders) | |
print writeheaders | |
else: | |
print i | |
hedindexes = [] | |
oldhed = None | |
hedstart = 0 | |
for j in range(len(headers)): | |
hed = (headers[j], headers2[j], headers3[j], headers4[j], headers5[j]) | |
if oldhed and oldhed!=hed: | |
hedindexes.append( (oldhed, hedstart, j) ) | |
hedstart = j | |
oldhed = hed | |
"""VC03 #CP2 header row starts on line 6; CP3 on line 7; CP4 on line 5 | |
HOUSING OCCUPANCY | |
Total housing units | |
HC01 | |
2010 Estimate""" | |
for line in fin: | |
line_ann = fin_ann.next() | |
geo = ['CP0%s'%i,] + line[:3] | |
geo_ann = ['CP0%s'%i,] + line[:3] | |
if geo != geo_ann: print geo, ' != ', geo_ann | |
for h in hedindexes: | |
hed = h[0] | |
start = h[1] | |
stop = h[2] | |
vals = [ line[start+3], line[start+4], line_ann[start+5], | |
line[start+6], line_ann[start+7], | |
line[start+8], line_ann[start+9], | |
line[start+10], line_ann[start+11], | |
] | |
outline = geo + list(hed) + vals | |
fout.writerow( outline ) | |
for i in [2,3,4]: #i didn't use CP_05, but it should work in addition to 2, 3 and 4 | |
process(i) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment