Last active
December 24, 2023 00:55
-
-
Save mattmc3/824f83b0328172c2da102e52382933ec to your computer and use it in GitHub Desktop.
AWK - convert HIPAA 5010 X12 834 Benefit Enrollment to TSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ISA*01*0000000000*01*0000000000*ZZ*ABCDEFGHIJKLMNO*ZZ*123456789012345*101127*1719*U*00400*000003438*0*P*> | |
GS*PO*4405197800*999999999*20101127*1719*1421*X*004010VICS | |
ST*834*0179 | |
BGN*00*1*20050315*110650****4 | |
REF*38*SAMPLE_POLICY_NUMBER | |
DTP*303*D8*20080321 | |
N1*P5*COMPAN_NAME*FI*000000000 | |
INS*Y*18*030*20*A | |
REF*0F*SUBSCRIBER_NUMBER | |
NM1*IL*1*JOHN DOE*R***34*1*0000000 | |
PER*IP**HP*2138051111 | |
N3*123 SAMPLE RD | |
N4*CITY*ST*12345 | |
DMG*D8*19690101 *F | |
HD*030 | |
DTP*348*D8*20080101 | |
REF*1L*INDIV_POLICY_NO | |
SE*16*0179 | |
GE*1*1421 | |
IEA*1*000003438 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This version is a simple bash one-liner to split an x12 file into parts | |
# {$1=$1} is a trick to tell AWK to re-evaluate $0, meaning OFS gets used | |
# prefixing FNR for the line number ensures that we can piece the file back together | |
# clean up with: `find . -type f -name '*.edi.part' -delete` | |
awk '{$1=$1}{print FNR,$0 > $1".edi.part"}' FS="*" OFS="\t" ./test834.edi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env awk -f | |
# author: mattmc3 | |
# file: x12_834.awk | |
# homepage: https://gist.github.com/mattmc3/824f83b0328172c2da102e52382933ec | |
# | |
# This sample 834 parser shows how to generically parse into header, record, | |
# and trailer concepts, and shows you how to identify loops, though it doesn't | |
# use them for anything. | |
# This is only meant to showcase one possible way of parsing EDI files with awk. | |
BEGIN { | |
FS="*" | |
# if you don't want tab delimited output, change this | |
OFS="\t" | |
# if the record separator isn't a newline (say a tilde) then change this | |
RS="\n" | |
count=0 | |
} | |
function print_header() { | |
print "ISA_07", \ | |
"INS_02", "INS_05", \ | |
"NM1_03", "NM1_04", "NM1_05", "NM1_06", "NM1_07", "NM1_08", "NM1_09", \ | |
"DMG_02", "DMG_03", \ | |
"REF_DX_02", "REF_0F_02", "REF_17_02", "REF_1L_02", "REF_23_02", "REF_ZZ_02", \ | |
"N3_01", "N3_02", "N4_01", "N4_02", "N4_03", \ | |
"PER_TE", "PER_HP", "PER_WP", "PER_EM", \ | |
"HD_HLT_01", "HD_HLT_03", "HD_HLT_04", "HD_HLT_05", \ | |
"HD_PDG_01", "HD_PDG_03", "HD_PDG_04", "HD_PDG_05", \ | |
"DTP_348_03", \ | |
"DTP_349_03" | |
} | |
function print_record() { | |
print HDR["ISA_07"], \ | |
REC["INS_02"], REC["INS_05"], \ | |
REC["NM1_03"], REC["NM1_04"], REC["NM1_05"], REC["NM1_06"], REC["NM1_07"], REC["NM1_08"], REC["NM1_09"], \ | |
REC["DMG_02"], REC["DMG_03"], | |
REC["REF_DX_02"], REC["REF_0F_02"], REC["REF_17_02"], REC["REF_1L_02"], REC["REF_23_02"], REC["REF_ZZ_02"], \ | |
REC["N3_01"], REC["N3_02"], \ | |
REC["N4_01"], REC["N4_02"], REC["N4_03"], \ | |
REC["PER_TE"], REC["PER_HP"], REC["PER_WP"], REC["PER_EM"], \ | |
REC["HD_HLT_01"], REC["HD_HLT_03"], REC["HD_HLT_04"], REC["HD_HLT_05"], \ | |
REC["HD_PDG_01"], REC["HD_PDG_03"], REC["HD_PDG_04"], REC["HD_PDG_05"], \ | |
REC["DTP_348_03"], \ | |
REC["DTP_349_03"] | |
# clear out record array for the next run | |
delete REC | |
} | |
function handle_new_record() { | |
count+=1 | |
if ( count == 1 ) { print_header() } | |
else { print_record() } | |
} | |
/^INS\*/ { handle_new_record() } | |
{ gsub(/~$/,"") } # remove trailing ~tilde~ | |
# define sections, subparts, and loops if you need those | |
/^ISA\*/ { section = "header" } | |
/^INS\*/ { section = "record" } | |
/^SE|GE|IEA\*/ { section = "trailer" } | |
{ subpart = 0 } | |
/^DTP|REF\*/ { subpart = 2 } | |
/^HD\*/ { subpart = 4 } | |
/^N1\*P5\*/ { loop_id = "1000A" } | |
/^N1\*IN\*/ { loop_id = "1000B" } | |
/^N1\*TV\*/ { loop_id = "1000C" } | |
/^ACT\*/ { loop_id = "1100C" } | |
/^INS\*/ { loop_id = "2000" } | |
/^NM1\*(74|IL)\*/ { loop_id = "2100A" } | |
/^NM1\*70\*/ { loop_id = "2100B" } | |
/^NM1\*31\*/ { loop_id = "2100C" } | |
/^NM1\*ES\*/ { loop_id = "2100D" } | |
/^NM1\*M8\*/ { loop_id = "2100E" } | |
/^NM1\*S3\*/ { loop_id = "2100F" } | |
/^NM1\*(E1|EI|EXS|GD|J6|QD)\*/ { loop_id = "2100G" } | |
/^DSB\*/ { loop_id = "2200" } | |
/^HD\*/ { loop_id = "2300" } | |
/^LX\*/ { loop_id = "2310" } | |
/^COB\*/ { loop_id = "2320" } | |
/^SE\*/ { loop_id = "" } | |
# handle setting segment data | |
/^[A-Z]+\*/ { | |
prefix = $1 | |
if ( subpart > 0 ) { | |
prefix = prefix "_" $subpart | |
} | |
for (i = 1; i <= NF; i++) { | |
keyname = prefix "_" sprintf("%02d", i) | |
value = $i | |
if ( section == "header" ) { HDR[keyname] = value } | |
else if ( section == "record" ) { REC[keyname] = value } | |
else if ( section == "trailer" ) { TRL[keyname] = value } | |
} | |
} | |
# print final record | |
END { print_record() } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env awk -f | |
# author: mattmc3 | |
# file: x12_834_extended | |
# homepage: https://gist.github.com/mattmc3/824f83b0328172c2da102e52382933ec | |
# | |
# This sample 834 parser shows how you can use the INS segment to start a new | |
# record, and can flatten and pull out select fields from an x12 834 EDI benefit | |
# enrollment file. This is only meant to showcase one way of parsing EDI files | |
# with awk, but there are many other good ways to handle EDI files too. | |
BEGIN { | |
FS="*" | |
# if you don't want tab delimited output, change this | |
OFS="\t" | |
# if the record separator isn't a newline (say a tilde) then change this | |
RS="\n" | |
count=0 | |
} | |
function print_header() { | |
print "INS_02_INDIVIDUAL_RELATIONSHIP_CODE", \ | |
"INS_05_BENEFIT_STATUS_CODE", \ | |
"NM1_03_NAME_LAST_OR_ORG_NAME", \ | |
"NM1_04_NAME_FIRST", \ | |
"NM1_05_NAME_MIDDLE", \ | |
"NM1_06_NAME_PREFIX", \ | |
"NM1_07_NAME_SUFFIX", \ | |
"NM1_08_ID_CODE_QUALIFIER", \ | |
"NM1_09_ID_CODE", \ | |
"DMG_02_DATE_TIME_PERIOD", \ | |
"DMG_03_GENDER_CODE", \ | |
"REF_DX_02_REFERENCE_IDENTIFICATION", \ | |
"REF_0F_02_REFERENCE_IDENTIFICATION", \ | |
"REF_17_02_REFERENCE_IDENTIFICATION", \ | |
"REF_1L_02_REFERENCE_IDENTIFICATION", \ | |
"REF_23_02_REFERENCE_IDENTIFICATION", \ | |
"REF_ZZ_02_REFERENCE_IDENTIFICATION", \ | |
"N3_01_ADDRESS_INFORMATION", \ | |
"N3_02_ADDRESS_INFORMATION", \ | |
"N4_01_CITY_NAME", \ | |
"N4_02_STATE_OR_PROV_CODE", \ | |
"N4_03_POSTAL_CODE", \ | |
"PER_TE_COMMUNICATION_NUMBER", \ | |
"PER_HP_COMMUNICATION_NUMBER", \ | |
"PER_WP_COMMUNICATION_NUMBER", \ | |
"PER_EM_COMMUNICATION_NUMBER", \ | |
"HD_HLT_01_MAINTENANCE_TYPE_CODE", \ | |
"HD_HLT_03_INSURANCE_LINE_CODE", \ | |
"HD_HLT_04_PLAN_CVRG_DESCRIPTION", \ | |
"HD_HLT_05_COVERAGE_LEVEL_CODE", \ | |
"HD_PDG_01_MAINTENANCE_TYPE_CODE", \ | |
"HD_PDG_03_INSURANCE_LINE_CODE", \ | |
"HD_PDG_04_PLAN_CVRG_DESCRIPTION", \ | |
"HD_PDG_05_COVERAGE_LEVEL_CODE", \ | |
"DTP_348_03_DATE_TIME_PERIOD", \ | |
"DTP_349_03_DATE_TIME_PERIOD" | |
} | |
function print_record() { | |
print INS["02_INDIVIDUAL_RELATIONSHIP_CODE"], \ | |
INS["05_BENEFIT_STATUS_CODE"], \ | |
NM1["03_NAME_LAST_OR_ORG_NAME"], \ | |
NM1["04_NAME_FIRST"], \ | |
NM1["05_NAME_MIDDLE"], \ | |
NM1["06_NAME_PREFIX"], \ | |
NM1["07_NAME_SUFFIX"], \ | |
NM1["08_ID_CODE_QUALIFIER"], \ | |
NM1["09_ID_CODE"], \ | |
DMG["02_DATE_TIME_PERIOD"], \ | |
DMG["03_GENDER_CODE"], \ | |
REF["DX_02_REFERENCE_IDENTIFICATION"], \ | |
REF["0F_02_REFERENCE_IDENTIFICATION"], \ | |
REF["17_02_REFERENCE_IDENTIFICATION"], \ | |
REF["1L_02_REFERENCE_IDENTIFICATION"], \ | |
REF["23_02_REFERENCE_IDENTIFICATION"], \ | |
REF["ZZ_02_REFERENCE_IDENTIFICATION"], \ | |
N3["01_ADDRESS_INFORMATION"], \ | |
N3["02_ADDRESS_INFORMATION"], \ | |
N4["01_CITY_NAME"], \ | |
N4["02_STATE_OR_PROV_CODE"], \ | |
N4["03_POSTAL_CODE"], \ | |
PER["TE_COMMUNICATION_NUMBER"], \ | |
PER["HP_COMMUNICATION_NUMBER"], \ | |
PER["WP_COMMUNICATION_NUMBER"], \ | |
PER["EM_COMMUNICATION_NUMBER"], \ | |
HD["HLT_01_MAINTENANCE_TYPE_CODE"], \ | |
HD["HLT_03_INSURANCE_LINE_CODE"], \ | |
HD["HLT_04_PLAN_CVRG_DESCRIPTION"], \ | |
HD["HLT_05_COVERAGE_LEVEL_CODE"], \ | |
HD["PDG_01_MAINTENANCE_TYPE_CODE"], \ | |
HD["PDG_03_INSURANCE_LINE_CODE"], \ | |
HD["PDG_04_PLAN_CVRG_DESCRIPTION"], \ | |
HD["PDG_05_COVERAGE_LEVEL_CODE"], \ | |
DTP["348_03_DATE_TIME_PERIOD"], \ | |
DTP["349_03_DATE_TIME_PERIOD"] | |
# clear out record arrays for the next run | |
delete DMG | |
delete DTP | |
delete HD | |
delete INS | |
delete N3 | |
delete N4 | |
delete NM1 | |
delete PER | |
delete REF | |
} | |
function handle_new_record() { | |
count+=1 | |
if ( count == 1 ) { | |
print_header() | |
} | |
else { | |
print_record() | |
} | |
} | |
{ | |
# ditch any trailing ~tilde~ for all records | |
gsub(/~$/,"") | |
} | |
/^DMG\*/ { | |
# DMG - Demographic Information | |
# MEMBER DEMOGRAPHICS | |
DMG["01_DATE_TIME_FORMAT_QUALIFIER"] = $2 | |
DMG["02_DATE_TIME_PERIOD"] = $3 | |
DMG["03_GENDER_CODE"] = $4 | |
DMG["04_MARITAL_STATUS_CODE"] = $5 | |
DMG["05_RACE_OR_ETHNIC_CODE"] = $6 | |
DMG["06_CITIZENSHIP_STATUS_CODE"] = $7 | |
} | |
/^DTP\*/ { | |
# DTP - Date or Time or Period | |
# MEMBER LEVEL DATES | |
# 336/337 - employment | |
# 338/339 - medicare | |
# 340/341 - COBRA | |
# 344/345 - COB | |
# 348/349 - health coverage | |
# 356/357 - eligibility | |
# 360/361 - disability | |
DTP[$2 "_01_DATE_TIME_QUALIFIER"] = $2 | |
DTP[$2 "_02_DATE_TIME_PERIOD_FORMAT_QUALIFIER"] = $3 | |
DTP[$2 "_03_DATE_TIME_PERIOD"] = $4 | |
} | |
/^HD\*/ { | |
# HD - Health Coverage | |
HD[$4 "_01_MAINTENANCE_TYPE_CODE"] = $2 | |
HD[$4 "_03_INSURANCE_LINE_CODE"] = $4 | |
HD[$4 "_04_PLAN_CVRG_DESCRIPTION"] = $5 | |
HD[$4 "_05_COVERAGE_LEVEL_CODE"] = $6 | |
} | |
/^INS\*/ { | |
# INS - Insured Benefit | |
# MEMBER LEVEL DETAIL | |
handle_new_record() | |
INS["01_YES_NO_CONDITION_OR_RESPONSE_CODE"] = $2 | |
INS["02_INDIVIDUAL_RELATIONSHIP_CODE"] = $3 | |
INS["03_MAINTENANCE_TYPE_CODE"] = $4 | |
INS["04_MAINTENANCE_REASON_CODE"] = $5 | |
INS["05_BENEFIT_STATUS_CODE"] = $6 | |
INS["06_MEDICARE_PLAN_CODE"] = $7 | |
INS["07_COBRA_QUALIFYING"] = $8 | |
INS["08_EMPLOYMENT_STATUS_CODE" ] = $9 | |
INS["09_STUDENT_STATUS_CODE"] = $10 | |
INS["10_YES_NO_CONDITION_OR_RESPONSE_CODE"] = $11 | |
INS["11_DATE_TIME_PERIOD_FORMAT_QUALIFIER"] = $12 | |
INS["12_DATE_TIME_PERIOD"] = $13 | |
} | |
/^N3\*/ { | |
# N3 - Address Information | |
# MEMBER RESIDENCE STREET ADDRESS | |
N3["01_ADDRESS_INFORMATION"] = $2 | |
N3["02_ADDRESS_INFORMATION"] = $3 | |
} | |
/^N4\*/ { | |
# N4 - Geographic Location | |
# MEMBER RESIDENCE CITY, STATE, ZIP CODE | |
N4["01_CITY_NAME"] = $2 | |
N4["02_STATE_OR_PROV_CODE"] = $3 | |
N4["03_POSTAL_CODE"] = $4 | |
N4["04_COUNTRY_CODE"] = $5 | |
N4["05_LOCATION_QUALIFIER"] = $6 | |
N4["06_LOCATION_IDENTIFIER"] = $7 | |
} | |
/^NM1\*/ { | |
# NM1 - Individual or Organizational Name | |
# MEMBER NAME | |
NM1["01_ENTITY_ID_CODE"] = $2 | |
NM1["02_ENTITY_TYPE_QUALIFIER"] = $3 | |
NM1["03_NAME_LAST_OR_ORG_NAME"] = $4 | |
NM1["04_NAME_FIRST"] = $5 | |
NM1["05_NAME_MIDDLE"] = $6 | |
NM1["06_NAME_PREFIX"] = $7 | |
NM1["07_NAME_SUFFIX"] = $8 | |
NM1["08_ID_CODE_QUALIFIER"] = $9 | |
NM1["09_ID_CODE"] = $10 | |
} | |
/^PER\*/ { | |
# PER - Administrative Communications Contact | |
# MEMBER COMMUNICATIONS NUMBERS | |
if (length($4) > 0 && length($5) > 0) { | |
PER[$4 "_01_CONTACT_FUNCTION_CODE"] = $2 | |
PER[$4 "_02_NAME"] = $3 | |
PER[$4 "_ORDINAL"] = "1" | |
PER[$4 "_COMMUNICATION_NUMBER_QUALIFIER"] = $4 | |
PER[$4 "_COMMUNICATION_NUMBER"] = $5 | |
} | |
if (length($6) > 0 && length($7) > 0) { | |
PER[$6 "_01_CONTACT_FUNCTION_CODE"] = $2 | |
PER[$6 "_02_NAME"] = $3 | |
PER[$6 "_ORDINAL"] = "2" | |
PER[$6 "_COMMUNICATION_NUMBER_QUALIFIER"] = $6 | |
PER[$6 "_COMMUNICATION_NUMBER"] = $7 | |
} | |
if (length($8) > 0 && length($9) > 0) { | |
PER[$8 "_01_CONTACT_FUNCTION_CODE"] = $2 | |
PER[$8 "_02_NAME"] = $3 | |
PER[$8 "_ORDINAL"] = "2" | |
PER[$8 "_COMMUNICATION_NUMBER_QUALIFIER"] = $8 | |
PER[$8 "_COMMUNICATION_NUMBER"] = $9 | |
} | |
} | |
/^REF\*/ { | |
# REF - Reference Identification | |
# 38: Master Policy Number | |
# DX: Department/Agency Number | |
# 0F: Subscriber Number | |
# 1L: Group or Policy Number | |
# 17: Client Reporting Category | |
# 23: Client Number | |
# 3H: Case Number | |
# F6: Health Insurance Claim (HIC) Number | |
# ZZ: Mutually Defined | |
REF[$2 "_01_REFERENCE_IDENTIFICATION_QUALIFIER"] = $2 | |
REF[$2 "_02_REFERENCE_IDENTIFICATION"] = $3 | |
} | |
END { | |
# print final record | |
print_record() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment