Skip to content

Instantly share code, notes, and snippets.

@akanik
Created August 12, 2021 15:25
Show Gist options
  • Save akanik/f74218fc854d1e525fa34263d1c42d7a to your computer and use it in GitHub Desktop.
Save akanik/f74218fc854d1e525fa34263d1c42d7a to your computer and use it in GitHub Desktop.
data_dir10 = '../data/2010/tx2010.pl/'
FILES10 = {
'cols': '../data/2010/2010_PLSummaryFile_FieldNames.xlsx',
'geoheader': data_dir10+'txgeo2010.pl',
'seq01': data_dir10+'tx000012010.pl',
'seq02': data_dir10+'tx000022010.pl',
}
def return_dtypes_dict(df):
return dict(zip(df['Name'], df['dtype']))
####################################
## READ IN THE GEOHEADER
####################################
geo_defs10 = pd.read_excel(FILES10['cols'],sheet_name='geo')
geo_cols10 = geo_defs10['Name'].tolist()
geo_dtypes10 = return_dtypes_dict(geo_defs10)
geo_widths10 = geo_defs10['Size'].astype(int).tolist()
gh_df10 = pd.read_fwf(
FILES10['geoheader'],
encoding='latin1',
header=None,
names=geo_cols10,
widths=geo_widths10,
dtype=geo_dtypes10
)
#creating this slice so we can join on the human-readable geo name
gh_df10_place_hr = gh_df10[['LOGRECNO','NAME','STATE','PLACE']]
gh_df10_cnty_hr = gh_df10[['LOGRECNO','NAME','STATE','COUNTY']]
####################################
## READ IN P1
####################################
p1_defs10 = pd.read_excel(FILES10['cols'],sheet_name='p1')
p1_cols10 = p1_defs10['Name'].tolist()
p1_dtypes10 = return_dtypes_dict(p1_defs10)
p1_df10 = pd.read_csv(
FILES10['seq01'],
header=None,
names=p1_cols10,
dtype=p1_dtypes10
)
####################################
## READ IN P2
####################################
p2_defs10 = pd.read_excel(FILES10['cols'],sheet_name='p2')
p2_cols10 = p2_defs10['Name'].tolist()
p2_dtypes10 = return_dtypes_dict(p2_defs10)
p2_df10 = pd.read_csv(
FILES10['seq02'],
header=None,
names=p2_cols10,
dtype=p2_dtypes10
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment