Skip to content

Instantly share code, notes, and snippets.

@mrkatey
Last active October 11, 2022 14:22
Show Gist options
  • Save mrkatey/e058d807fdd09566fa91b6be491ad414 to your computer and use it in GitHub Desktop.
Save mrkatey/e058d807fdd09566fa91b6be491ad414 to your computer and use it in GitHub Desktop.
Convert SSDI to CSV
import csv
from dataclasses import field
key = {
"0":[[0],'(A)dd/(C)hange/(D)elete'],
"1":[[2,11], 'SS Number'],
"2":[[10,31], 'Last name'],
"3":[[30,35], 'Name Suffix'],
"4":[[34,50], 'First Name'],
"5":[[49,65], 'Middle Name'],
"6":[[64],'(V)erified/(P)roof'],
"7":[[65,73], 'Date of Death (mmddccyy)'],
"8":[[73,82], 'Date of Birth (mmddccyy)'],
"9":[[81,84], 'Blanks'],
"10":[[83,89], 'Blanks'],
"11":[[88,94], 'Blanks'],
"12":[[93,101], 'Blanks']}
def split_using_key(line):
clean_data = []
for k,v in key.items():
indxs, title = v
if k == '7' or k == '8':
# parse date
v, v2 = indxs
date = line[v:v2]
mm, dd, yy = [date[0:2], date[2:4], date[4:]]
clean_data.append("/".join([mm,dd,yy]))
elif len(indxs) == 1:
v = indxs[0]
clean_data.append(line[v])
elif len(indxs) == 2:
v, v2 = indxs
clean_data.append(line[v:v2])
return clean_data
FILENAME = 'ssdm1'
with open(f'{FILENAME}_processed.csv', 'w', encoding='utf-8') as outfile:
fields = [a[1] for a in key.values()]
writer = csv.DictWriter(outfile, fieldnames=fields)
writer.writeheader()
with open(FILENAME, 'r',encoding='utf-8') as f:
while True:
chunk = f.readlines(5000)
for line in chunk:
values = split_using_key(line)
writer.writerow({fields[count]:values[count] for count,_ in enumerate(values)})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment