Skip to content

Instantly share code, notes, and snippets.

@mattmc3
Last active December 24, 2023 00:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mattmc3/824f83b0328172c2da102e52382933ec to your computer and use it in GitHub Desktop.
Save mattmc3/824f83b0328172c2da102e52382933ec to your computer and use it in GitHub Desktop.
AWK - convert HIPAA 5010 X12 834 Benefit Enrollment to TSV
ISA*01*0000000000*01*0000000000*ZZ*ABCDEFGHIJKLMNO*ZZ*123456789012345*101127*1719*U*00400*000003438*0*P*>
GS*PO*4405197800*999999999*20101127*1719*1421*X*004010VICS
ST*834*0179
BGN*00*1*20050315*110650****4
REF*38*SAMPLE_POLICY_NUMBER
DTP*303*D8*20080321
N1*P5*COMPAN_NAME*FI*000000000
INS*Y*18*030*20*A
REF*0F*SUBSCRIBER_NUMBER
NM1*IL*1*JOHN DOE*R***34*1*0000000
PER*IP**HP*2138051111
N3*123 SAMPLE RD
N4*CITY*ST*12345
DMG*D8*19690101 *F
HD*030
DTP*348*D8*20080101
REF*1L*INDIV_POLICY_NO
SE*16*0179
GE*1*1421
IEA*1*000003438
# This version is a simple bash one-liner to split an x12 file into parts
# {$1=$1} is a trick to tell AWK to re-evaluate $0, meaning OFS gets used
# prefixing FNR for the line number ensures that we can piece the file back together
# clean up with: `find . -type f -name '*.edi.part' -delete`
awk '{$1=$1}{print FNR,$0 > $1".edi.part"}' FS="*" OFS="\t" ./test834.edi
#!/usr/bin/env awk -f
# author: mattmc3
# file: x12_834.awk
# homepage: https://gist.github.com/mattmc3/824f83b0328172c2da102e52382933ec
#
# This sample 834 parser shows how to generically parse into header, record,
# and trailer concepts, and shows you how to identify loops, though it doesn't
# use them for anything.
# This is only meant to showcase one possible way of parsing EDI files with awk.
BEGIN {
FS="*"
# if you don't want tab delimited output, change this
OFS="\t"
# if the record separator isn't a newline (say a tilde) then change this
RS="\n"
count=0
}
function print_header() {
print "ISA_07", \
"INS_02", "INS_05", \
"NM1_03", "NM1_04", "NM1_05", "NM1_06", "NM1_07", "NM1_08", "NM1_09", \
"DMG_02", "DMG_03", \
"REF_DX_02", "REF_0F_02", "REF_17_02", "REF_1L_02", "REF_23_02", "REF_ZZ_02", \
"N3_01", "N3_02", "N4_01", "N4_02", "N4_03", \
"PER_TE", "PER_HP", "PER_WP", "PER_EM", \
"HD_HLT_01", "HD_HLT_03", "HD_HLT_04", "HD_HLT_05", \
"HD_PDG_01", "HD_PDG_03", "HD_PDG_04", "HD_PDG_05", \
"DTP_348_03", \
"DTP_349_03"
}
function print_record() {
print HDR["ISA_07"], \
REC["INS_02"], REC["INS_05"], \
REC["NM1_03"], REC["NM1_04"], REC["NM1_05"], REC["NM1_06"], REC["NM1_07"], REC["NM1_08"], REC["NM1_09"], \
REC["DMG_02"], REC["DMG_03"],
REC["REF_DX_02"], REC["REF_0F_02"], REC["REF_17_02"], REC["REF_1L_02"], REC["REF_23_02"], REC["REF_ZZ_02"], \
REC["N3_01"], REC["N3_02"], \
REC["N4_01"], REC["N4_02"], REC["N4_03"], \
REC["PER_TE"], REC["PER_HP"], REC["PER_WP"], REC["PER_EM"], \
REC["HD_HLT_01"], REC["HD_HLT_03"], REC["HD_HLT_04"], REC["HD_HLT_05"], \
REC["HD_PDG_01"], REC["HD_PDG_03"], REC["HD_PDG_04"], REC["HD_PDG_05"], \
REC["DTP_348_03"], \
REC["DTP_349_03"]
# clear out record array for the next run
delete REC
}
function handle_new_record() {
count+=1
if ( count == 1 ) { print_header() }
else { print_record() }
}
/^INS\*/ { handle_new_record() }
{ gsub(/~$/,"") } # remove trailing ~tilde~
# define sections, subparts, and loops if you need those
/^ISA\*/ { section = "header" }
/^INS\*/ { section = "record" }
/^SE|GE|IEA\*/ { section = "trailer" }
{ subpart = 0 }
/^DTP|REF\*/ { subpart = 2 }
/^HD\*/ { subpart = 4 }
/^N1\*P5\*/ { loop_id = "1000A" }
/^N1\*IN\*/ { loop_id = "1000B" }
/^N1\*TV\*/ { loop_id = "1000C" }
/^ACT\*/ { loop_id = "1100C" }
/^INS\*/ { loop_id = "2000" }
/^NM1\*(74|IL)\*/ { loop_id = "2100A" }
/^NM1\*70\*/ { loop_id = "2100B" }
/^NM1\*31\*/ { loop_id = "2100C" }
/^NM1\*ES\*/ { loop_id = "2100D" }
/^NM1\*M8\*/ { loop_id = "2100E" }
/^NM1\*S3\*/ { loop_id = "2100F" }
/^NM1\*(E1|EI|EXS|GD|J6|QD)\*/ { loop_id = "2100G" }
/^DSB\*/ { loop_id = "2200" }
/^HD\*/ { loop_id = "2300" }
/^LX\*/ { loop_id = "2310" }
/^COB\*/ { loop_id = "2320" }
/^SE\*/ { loop_id = "" }
# handle setting segment data
/^[A-Z]+\*/ {
prefix = $1
if ( subpart > 0 ) {
prefix = prefix "_" $subpart
}
for (i = 1; i <= NF; i++) {
keyname = prefix "_" sprintf("%02d", i)
value = $i
if ( section == "header" ) { HDR[keyname] = value }
else if ( section == "record" ) { REC[keyname] = value }
else if ( section == "trailer" ) { TRL[keyname] = value }
}
}
# print final record
END { print_record() }
#!/usr/bin/env awk -f
# author: mattmc3
# file: x12_834_extended
# homepage: https://gist.github.com/mattmc3/824f83b0328172c2da102e52382933ec
#
# This sample 834 parser shows how you can use the INS segment to start a new
# record, and can flatten and pull out select fields from an x12 834 EDI benefit
# enrollment file. This is only meant to showcase one way of parsing EDI files
# with awk, but there are many other good ways to handle EDI files too.
BEGIN {
FS="*"
# if you don't want tab delimited output, change this
OFS="\t"
# if the record separator isn't a newline (say a tilde) then change this
RS="\n"
count=0
}
function print_header() {
print "INS_02_INDIVIDUAL_RELATIONSHIP_CODE", \
"INS_05_BENEFIT_STATUS_CODE", \
"NM1_03_NAME_LAST_OR_ORG_NAME", \
"NM1_04_NAME_FIRST", \
"NM1_05_NAME_MIDDLE", \
"NM1_06_NAME_PREFIX", \
"NM1_07_NAME_SUFFIX", \
"NM1_08_ID_CODE_QUALIFIER", \
"NM1_09_ID_CODE", \
"DMG_02_DATE_TIME_PERIOD", \
"DMG_03_GENDER_CODE", \
"REF_DX_02_REFERENCE_IDENTIFICATION", \
"REF_0F_02_REFERENCE_IDENTIFICATION", \
"REF_17_02_REFERENCE_IDENTIFICATION", \
"REF_1L_02_REFERENCE_IDENTIFICATION", \
"REF_23_02_REFERENCE_IDENTIFICATION", \
"REF_ZZ_02_REFERENCE_IDENTIFICATION", \
"N3_01_ADDRESS_INFORMATION", \
"N3_02_ADDRESS_INFORMATION", \
"N4_01_CITY_NAME", \
"N4_02_STATE_OR_PROV_CODE", \
"N4_03_POSTAL_CODE", \
"PER_TE_COMMUNICATION_NUMBER", \
"PER_HP_COMMUNICATION_NUMBER", \
"PER_WP_COMMUNICATION_NUMBER", \
"PER_EM_COMMUNICATION_NUMBER", \
"HD_HLT_01_MAINTENANCE_TYPE_CODE", \
"HD_HLT_03_INSURANCE_LINE_CODE", \
"HD_HLT_04_PLAN_CVRG_DESCRIPTION", \
"HD_HLT_05_COVERAGE_LEVEL_CODE", \
"HD_PDG_01_MAINTENANCE_TYPE_CODE", \
"HD_PDG_03_INSURANCE_LINE_CODE", \
"HD_PDG_04_PLAN_CVRG_DESCRIPTION", \
"HD_PDG_05_COVERAGE_LEVEL_CODE", \
"DTP_348_03_DATE_TIME_PERIOD", \
"DTP_349_03_DATE_TIME_PERIOD"
}
function print_record() {
print INS["02_INDIVIDUAL_RELATIONSHIP_CODE"], \
INS["05_BENEFIT_STATUS_CODE"], \
NM1["03_NAME_LAST_OR_ORG_NAME"], \
NM1["04_NAME_FIRST"], \
NM1["05_NAME_MIDDLE"], \
NM1["06_NAME_PREFIX"], \
NM1["07_NAME_SUFFIX"], \
NM1["08_ID_CODE_QUALIFIER"], \
NM1["09_ID_CODE"], \
DMG["02_DATE_TIME_PERIOD"], \
DMG["03_GENDER_CODE"], \
REF["DX_02_REFERENCE_IDENTIFICATION"], \
REF["0F_02_REFERENCE_IDENTIFICATION"], \
REF["17_02_REFERENCE_IDENTIFICATION"], \
REF["1L_02_REFERENCE_IDENTIFICATION"], \
REF["23_02_REFERENCE_IDENTIFICATION"], \
REF["ZZ_02_REFERENCE_IDENTIFICATION"], \
N3["01_ADDRESS_INFORMATION"], \
N3["02_ADDRESS_INFORMATION"], \
N4["01_CITY_NAME"], \
N4["02_STATE_OR_PROV_CODE"], \
N4["03_POSTAL_CODE"], \
PER["TE_COMMUNICATION_NUMBER"], \
PER["HP_COMMUNICATION_NUMBER"], \
PER["WP_COMMUNICATION_NUMBER"], \
PER["EM_COMMUNICATION_NUMBER"], \
HD["HLT_01_MAINTENANCE_TYPE_CODE"], \
HD["HLT_03_INSURANCE_LINE_CODE"], \
HD["HLT_04_PLAN_CVRG_DESCRIPTION"], \
HD["HLT_05_COVERAGE_LEVEL_CODE"], \
HD["PDG_01_MAINTENANCE_TYPE_CODE"], \
HD["PDG_03_INSURANCE_LINE_CODE"], \
HD["PDG_04_PLAN_CVRG_DESCRIPTION"], \
HD["PDG_05_COVERAGE_LEVEL_CODE"], \
DTP["348_03_DATE_TIME_PERIOD"], \
DTP["349_03_DATE_TIME_PERIOD"]
# clear out record arrays for the next run
delete DMG
delete DTP
delete HD
delete INS
delete N3
delete N4
delete NM1
delete PER
delete REF
}
function handle_new_record() {
count+=1
if ( count == 1 ) {
print_header()
}
else {
print_record()
}
}
{
# ditch any trailing ~tilde~ for all records
gsub(/~$/,"")
}
/^DMG\*/ {
# DMG - Demographic Information
# MEMBER DEMOGRAPHICS
DMG["01_DATE_TIME_FORMAT_QUALIFIER"] = $2
DMG["02_DATE_TIME_PERIOD"] = $3
DMG["03_GENDER_CODE"] = $4
DMG["04_MARITAL_STATUS_CODE"] = $5
DMG["05_RACE_OR_ETHNIC_CODE"] = $6
DMG["06_CITIZENSHIP_STATUS_CODE"] = $7
}
/^DTP\*/ {
# DTP - Date or Time or Period
# MEMBER LEVEL DATES
# 336/337 - employment
# 338/339 - medicare
# 340/341 - COBRA
# 344/345 - COB
# 348/349 - health coverage
# 356/357 - eligibility
# 360/361 - disability
DTP[$2 "_01_DATE_TIME_QUALIFIER"] = $2
DTP[$2 "_02_DATE_TIME_PERIOD_FORMAT_QUALIFIER"] = $3
DTP[$2 "_03_DATE_TIME_PERIOD"] = $4
}
/^HD\*/ {
# HD - Health Coverage
HD[$4 "_01_MAINTENANCE_TYPE_CODE"] = $2
HD[$4 "_03_INSURANCE_LINE_CODE"] = $4
HD[$4 "_04_PLAN_CVRG_DESCRIPTION"] = $5
HD[$4 "_05_COVERAGE_LEVEL_CODE"] = $6
}
/^INS\*/ {
# INS - Insured Benefit
# MEMBER LEVEL DETAIL
handle_new_record()
INS["01_YES_NO_CONDITION_OR_RESPONSE_CODE"] = $2
INS["02_INDIVIDUAL_RELATIONSHIP_CODE"] = $3
INS["03_MAINTENANCE_TYPE_CODE"] = $4
INS["04_MAINTENANCE_REASON_CODE"] = $5
INS["05_BENEFIT_STATUS_CODE"] = $6
INS["06_MEDICARE_PLAN_CODE"] = $7
INS["07_COBRA_QUALIFYING"] = $8
INS["08_EMPLOYMENT_STATUS_CODE" ] = $9
INS["09_STUDENT_STATUS_CODE"] = $10
INS["10_YES_NO_CONDITION_OR_RESPONSE_CODE"] = $11
INS["11_DATE_TIME_PERIOD_FORMAT_QUALIFIER"] = $12
INS["12_DATE_TIME_PERIOD"] = $13
}
/^N3\*/ {
# N3 - Address Information
# MEMBER RESIDENCE STREET ADDRESS
N3["01_ADDRESS_INFORMATION"] = $2
N3["02_ADDRESS_INFORMATION"] = $3
}
/^N4\*/ {
# N4 - Geographic Location
# MEMBER RESIDENCE CITY, STATE, ZIP CODE
N4["01_CITY_NAME"] = $2
N4["02_STATE_OR_PROV_CODE"] = $3
N4["03_POSTAL_CODE"] = $4
N4["04_COUNTRY_CODE"] = $5
N4["05_LOCATION_QUALIFIER"] = $6
N4["06_LOCATION_IDENTIFIER"] = $7
}
/^NM1\*/ {
# NM1 - Individual or Organizational Name
# MEMBER NAME
NM1["01_ENTITY_ID_CODE"] = $2
NM1["02_ENTITY_TYPE_QUALIFIER"] = $3
NM1["03_NAME_LAST_OR_ORG_NAME"] = $4
NM1["04_NAME_FIRST"] = $5
NM1["05_NAME_MIDDLE"] = $6
NM1["06_NAME_PREFIX"] = $7
NM1["07_NAME_SUFFIX"] = $8
NM1["08_ID_CODE_QUALIFIER"] = $9
NM1["09_ID_CODE"] = $10
}
/^PER\*/ {
# PER - Administrative Communications Contact
# MEMBER COMMUNICATIONS NUMBERS
if (length($4) > 0 && length($5) > 0) {
PER[$4 "_01_CONTACT_FUNCTION_CODE"] = $2
PER[$4 "_02_NAME"] = $3
PER[$4 "_ORDINAL"] = "1"
PER[$4 "_COMMUNICATION_NUMBER_QUALIFIER"] = $4
PER[$4 "_COMMUNICATION_NUMBER"] = $5
}
if (length($6) > 0 && length($7) > 0) {
PER[$6 "_01_CONTACT_FUNCTION_CODE"] = $2
PER[$6 "_02_NAME"] = $3
PER[$6 "_ORDINAL"] = "2"
PER[$6 "_COMMUNICATION_NUMBER_QUALIFIER"] = $6
PER[$6 "_COMMUNICATION_NUMBER"] = $7
}
if (length($8) > 0 && length($9) > 0) {
PER[$8 "_01_CONTACT_FUNCTION_CODE"] = $2
PER[$8 "_02_NAME"] = $3
PER[$8 "_ORDINAL"] = "2"
PER[$8 "_COMMUNICATION_NUMBER_QUALIFIER"] = $8
PER[$8 "_COMMUNICATION_NUMBER"] = $9
}
}
/^REF\*/ {
# REF - Reference Identification
# 38: Master Policy Number
# DX: Department/Agency Number
# 0F: Subscriber Number
# 1L: Group or Policy Number
# 17: Client Reporting Category
# 23: Client Number
# 3H: Case Number
# F6: Health Insurance Claim (HIC) Number
# ZZ: Mutually Defined
REF[$2 "_01_REFERENCE_IDENTIFICATION_QUALIFIER"] = $2
REF[$2 "_02_REFERENCE_IDENTIFICATION"] = $3
}
END {
# print final record
print_record()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment