Skip to content

Instantly share code, notes, and snippets.

@jordanlewis
Created March 8, 2020 00:04
Show Gist options
  • Save jordanlewis/af63f33b269dff8ef71423d89042ec77 to your computer and use it in GitHub Desktop.
Save jordanlewis/af63f33b269dff8ef71423d89042ec77 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
field_to_len = [
("PUBSCNUM",8),
("REG",10),
("RACE1V",2),
("SEX",1),
("AGE_DX",3),
("YR_BRTH", 4),
("SEQ_NUM", 2),
("MDXRECMP",2),
("YEAR_DX", 4),
("PRIMSITE",4),
("LATERAL",1),
("HISTO3V",4),
("BEHO3V",1),
("GRADE",1),
("DX_CONF",1),
("REPT_SRC",1),
("RECNOREC",2),
("AGE_1REC",2),
("SITERWHO",5),
("ICCC3WHO",3),
("ICCC3XWHO",3),
("BEHTREND",1),
("HISTREC",2),
("HISTRECB",2),
("RAC_RECA",1),
("RAC_RECY",1),
("ORIGRECB",1),
("FIRSTPRM",1),
("ST_CNTY",5),
("IHSLINK",1),
("AYASITERWHO",2),
("LYMSUBRWHO",2),
("INTPRIM",1),
("BRST_SUB",1),
("SCSSM2KO",1)]
f = open("seer.txt")
output = open("seer.csv", "w+")
# Write the CSV header.
header = ""
for field, _ in field_to_len:
header += field + ","
# Chop off the extra comma at the end.
output.write(header[:-1] + "\n")
# Transform the input into CSV output.
for line in f.readlines():
cur_pos = 0
output_line = ""
for field, length in field_to_len:
value = line[cur_pos:cur_pos+length]
if value == "\n":
# The last field sometimes is not present.
continue
output_line += value + ","
cur_pos += length
# Chop off the extra comma at the end.
output.write(output_line[:-1] + "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment