Skip to content

Instantly share code, notes, and snippets.

@abishekmuthian
Created April 12, 2024 06:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abishekmuthian/041508473449005ebe4f0e0b2f7fbb5a to your computer and use it in GitHub Desktop.
Save abishekmuthian/041508473449005ebe4f0e0b2f7fbb5a to your computer and use it in GitHub Desktop.
Parsing Dexa scan report using Amazon Medical Comprehend
package users
import (
"encoding/json"
"math"
"sort"
"strconv"
"strings"
"github.com/abishekmuthian/bonehealthtracker/src/lib/server/log"
)
type Direction struct {
text string
beginOffset float64
endOffset float64
}
type BMD struct {
text float64
beginOffset float64
endOffset float64
}
type tScore struct {
text float64
beginOffset float64
endOffset float64
}
type zScore struct {
text float64
beginOffset float64
endOffset float64
}
// Parse parses the medical data from AWS Comprehend Medical and returns organs(bone sites)
func Parse(dexaData []byte) []Organ {
var result interface{}
err := json.Unmarshal(dexaData, &result)
if err != nil {
log.Error(log.V{"Parser, Error unmarshaling dexaData": err})
}
var organs []Organ
var directions []Direction
var bmds []BMD
var tScores []tScore
var zScores []zScore
m := result.(map[string]interface{})
for k, v := range m {
switch vv := v.(type) {
case string:
// log.Info(log.V{"Parser":k, "is string": vv})
case int:
// log.Info(log.V{"Parser":k, "is int": vv})
case []interface{}:
// log.Info(log.V{"Parser":k, "is array": vv})
for _, u := range vv {
if u.(map[string]interface{})["Type"] == "ANATOMY" {
if u.(map[string]interface{})["Attribute"] != nil {
attribute := u.(map[string]interface{})["Attribute"]
if attribute.(map[string]interface{})["Type"].(string) == "DIRECTION" {
// log.Info(log.V{"Parser, Direction":attribute.(map[string]interface{})["Text"]})
text := strings.ToLower(attribute.(map[string]interface{})["Text"].(string))
if text == "ap" ||
text == "left" ||
text == "right" {
d := Direction{
text: attribute.(map[string]interface{})["Text"].(string),
beginOffset: attribute.(map[string]interface{})["BeginOffset"].(float64),
endOffset: attribute.(map[string]interface{})["EndOffset"].(float64),
}
directions = append(directions, d)
}
}
}
}
if u.(map[string]interface{})["Type"] == "TEST_TREATMENT_PROCEDURE" {
attribute := u.(map[string]interface{})["Attribute"]
if attribute.(map[string]interface{})["Type"] == "TEST_VALUE" {
// log.Info(log.V{"Paser, BMD":attribute.(map[string]interface{})["Text"]})
text := strings.ToLower(attribute.(map[string]interface{})["Text"].(string))
if !strings.Contains(text, "low") &&
!strings.Contains(text, "high") &&
!strings.Contains(text, "normal") &&
!strings.Contains(text, "g/cm2") &&
!strings.Contains(text, "gm/cm2") &&
!strings.Contains(text, "gm per cm2") &&
!strings.Contains(text, "g/cm²") {
if f, err := strconv.ParseFloat(text, 64); err == nil && f < 1.5 {
b := BMD{
text: f,
beginOffset: attribute.(map[string]interface{})["BeginOffset"].(float64),
endOffset: attribute.(map[string]interface{})["EndOffset"].(float64),
}
bmds = append(bmds, b)
} else {
log.Error(log.V{"Parser, Error converting text to float": err})
}
}
}
}
if u.(map[string]interface{})["Category"] == "ANATOMY" {
if u.(map[string]interface{})["Type"] == "SYSTEM_ORGAN_SITE" {
// Disabling log for privacy
// log.Info(log.V{"Paser, Organ": u.(map[string]interface{})["Text"]})
text := strings.ToLower(u.(map[string]interface{})["Text"].(string))
if strings.Contains(text, "spine") ||
strings.Contains(text, "hip") ||
strings.Contains(text, "l1-l4") ||
strings.Contains(text, "l1 through l4") ||
strings.Contains(text, "femur") ||
strings.Contains(text, "neck") ||
strings.Contains(text, "forearm") {
o := Organ{
Site: u.(map[string]interface{})["Text"].(string),
BeginOffset: u.(map[string]interface{})["BeginOffset"].(float64),
EndOffset: u.(map[string]interface{})["EndOffset"].(float64),
}
// Change femoral to femur
if strings.Contains(o.Site, "femoral") {
o.Site = strings.Replace(o.Site, "femoral", "femur", -1)
}
// Change hip to femur
if strings.Contains(o.Site, "hip") {
o.Site = strings.Replace(o.Site, "hip", "femur", -1)
}
organs = append(organs, o)
}
}
}
if u.(map[string]interface{})["Category"] == "TEST_TREATMENT_PROCEDURE" {
text := strings.ToLower(u.(map[string]interface{})["Text"].(string))
if strings.Contains(text, "femur total") {
o := Organ{
Site: u.(map[string]interface{})["Text"].(string),
BeginOffset: u.(map[string]interface{})["BeginOffset"].(float64),
EndOffset: u.(map[string]interface{})["EndOffset"].(float64),
}
organs = append(organs, o)
if u.(map[string]interface{})["Attributes"] != nil {
attributes := u.(map[string]interface{})["Attributes"].([]interface{})
for _, a := range attributes {
// log.Info(log.V{"Parser, BMD": a.(map[string]interface{})["Text"]})
text := strings.ToLower(a.(map[string]interface{})["Text"].(string))
if !strings.Contains(text, "low") &&
!strings.Contains(text, "high") &&
!strings.Contains(text, "normal") &&
!strings.Contains(text, "g/cm2") &&
!strings.Contains(text, "gm/cm2") &&
!strings.Contains(text, "gm per cm2") &&
!strings.Contains(text, "g/cm²") {
if f, err := strconv.ParseFloat(text, 64); err == nil && f < 1.5 {
b := BMD{
text: f,
beginOffset: a.(map[string]interface{})["BeginOffset"].(float64),
endOffset: a.(map[string]interface{})["EndOffset"].(float64),
}
bmds = append(bmds, b)
} else {
log.Error(log.V{"Parser, Error converting text to float": err})
}
}
}
}
}
if strings.Contains(text, "bmd") || strings.Contains(text, "bone mineral density") {
if u.(map[string]interface{})["Attributes"] != nil {
attributes := u.(map[string]interface{})["Attributes"].([]interface{})
for _, a := range attributes {
if a.(map[string]interface{})["Type"] == "TEST_VALUE" {
// log.Info(log.V{"Parser, BMD":a.(map[string]interface{})["Text"]})
text := a.(map[string]interface{})["Text"].(string)
if !strings.Contains(text, "low") &&
!strings.Contains(text, "high") &&
!strings.Contains(text, "normal") &&
!strings.Contains(text, "g/cm2") &&
!strings.Contains(text, "gm/cm2") &&
!strings.Contains(text, "gm per cm2") &&
!strings.Contains(text, "g/cm²") {
if f, err := strconv.ParseFloat(text, 64); err == nil && f < 1.5 {
b := BMD{
text: f,
beginOffset: a.(map[string]interface{})["BeginOffset"].(float64),
endOffset: a.(map[string]interface{})["EndOffset"].(float64),
}
bmds = append(bmds, b)
} else {
log.Error(log.V{"Parser, Error converting text to float": err})
}
}
}
}
}
}
if u.(map[string]interface{})["Text"] == "T-score" {
if u.(map[string]interface{})["Attributes"] != nil {
attributes := u.(map[string]interface{})["Attributes"].([]interface{})
for _, a := range attributes {
// log.Info(log.V{"Parser, T-score":a.(map[string]interface{})["Text"]})
text := strings.ToLower(a.(map[string]interface{})["Text"].(string))
if !strings.Contains(text, "low") &&
!strings.Contains(text, "high") &&
!strings.Contains(text, "normal") &&
!strings.Contains(text, "g/cm2") &&
!strings.Contains(text, "gm/cm2") &&
!strings.Contains(text, "gm per cm2") &&
!strings.Contains(text, "g/cm²") {
if f, err := strconv.ParseFloat(text, 64); err == nil && (f >= -5.0 && f <= 2.5) {
t := tScore{
text: f,
beginOffset: a.(map[string]interface{})["BeginOffset"].(float64),
endOffset: a.(map[string]interface{})["EndOffset"].(float64),
}
tScores = append(tScores, t)
} else {
log.Error(log.V{"Parser, Error converting text to float": err})
}
}
}
}
}
if u.(map[string]interface{})["Text"] == "Z-score" {
if u.(map[string]interface{})["Attributes"] != nil {
attributes := u.(map[string]interface{})["Attributes"].([]interface{})
for _, a := range attributes {
// log.Info(log.V{"Parser, Z-score":a.(map[string]interface{})["Text"]})
text := strings.ToLower(a.(map[string]interface{})["Text"].(string))
if !strings.Contains(text, "low") &&
!strings.Contains(text, "high") &&
!strings.Contains(text, "normal") &&
!strings.Contains(text, "g/cm2") &&
!strings.Contains(text, "gm/cm2") &&
!strings.Contains(text, "gm per cm2") &&
!strings.Contains(text, "g/cm²") {
if f, err := strconv.ParseFloat(text, 64); err == nil && (f >= -2.5 && f <= 2.5) {
z := zScore{
text: f,
beginOffset: a.(map[string]interface{})["BeginOffset"].(float64),
endOffset: a.(map[string]interface{})["EndOffset"].(float64),
}
zScores = append(zScores, z)
} else {
log.Error(log.V{"Parser, Error converting text to float": err})
}
}
}
}
}
}
}
default:
log.Info(log.V{"Parser, Don't know how to handle type": k})
}
}
/* log.Info(log.V{"Parser, Organs": organs})
log.Info(log.V{"Parser, Directions": directions})
log.Info(log.V{"Parser, BMDs": bmds})
log.Info(log.V{"Parser, T-Scores": tScores})
log.Info(log.V{"Parser, Z-Scores": zScores}) */
return setOrganValues(organs, directions, tScores, zScores, bmds)
}
// setOrganValues matches the T-Scores,Z-Scores,Directions,BMDs to the Organs(bone sites)
func setOrganValues(organs []Organ, directions []Direction, tScores []tScore, zScores []zScore, bmds []BMD) []Organ {
var organBeginOffsets, organEndOffsets []float64
var tempOrgans []Organ
organBeginOffsets = []float64{}
organEndOffsets = []float64{}
for _, organ := range organs {
organBeginOffsets = append(organBeginOffsets, organ.BeginOffset)
organEndOffsets = append(organEndOffsets, organ.EndOffset)
}
for _, direction := range directions {
closestBeginOffsetIndex := findClosestElementIndex(organBeginOffsets, 1, direction.endOffset)
closestEndOffsetIndex := findClosestElementIndex(organEndOffsets, 1, direction.beginOffset)
if direction.endOffset-organBeginOffsets[closestBeginOffsetIndex] > organEndOffsets[closestEndOffsetIndex]-direction.beginOffset {
organs[closestEndOffsetIndex].Direction = direction.text
} else {
organs[closestBeginOffsetIndex].Direction = direction.text
}
}
for i, organ := range organs {
if i != len(organs)-1 && (organ.EndOffset+1 == organs[i+1].BeginOffset) {
if organ.Direction != "" {
organs[i].Site = organ.Site + " " + organs[i+1].Site
if !strings.Contains(strings.ToLower(organs[i+1].Site), "forearm") {
organs[i].EndOffset = organs[i+1].EndOffset
organs[i+1].Direction = "Remove"
}
} else {
organs[i+1].Site = organ.Site + " " + organs[i+1].Site
if !strings.Contains(strings.ToLower(organs[i].Site), "forearm") {
organs[i+1].BeginOffset = organ.BeginOffset
organs[i].Direction = "Remove"
}
}
} else if organ.Direction == "" && !strings.Contains(strings.ToLower(organs[i].Site), "forearm") &&
!strings.Contains(strings.ToLower(organs[i].Site), "l1 through l4") &&
!strings.Contains(strings.ToLower(organs[i].Site), "l1-l4") {
if strings.Contains(strings.ToLower(organs[i].Site), "left") ||
strings.Contains(strings.ToLower(organs[i].Site), "right") ||
strings.Contains(strings.ToLower(organs[i].Site), "total") {
if strings.Contains(strings.ToLower(organs[i].Site), "left") {
organs[i].Site = strings.Replace(strings.ToLower(organs[i].Site), " left", "", -1)
organs[i].Site = strings.Replace(strings.ToLower(organs[i].Site), "left ", "", -1)
organs[i].Site = strings.Replace(strings.ToLower(organs[i].Site), " left ", "", -1)
organs[i].Direction = "Left"
}
if strings.Contains(strings.ToLower(organs[i].Site), "right") {
organs[i].Site = strings.Replace(strings.ToLower(organs[i].Site), " right", "", -1)
organs[i].Site = strings.Replace(strings.ToLower(organs[i].Site), "right ", "", -1)
organs[i].Site = strings.Replace(strings.ToLower(organs[i].Site), " right ", "", -1)
organs[i].Direction = "Right"
}
if strings.Contains(strings.ToLower(organs[i].Site), "total") {
organs[i].Site = strings.Replace(strings.ToLower(organs[i].Site), " total", "", -1)
organs[i].Site = strings.Replace(strings.ToLower(organs[i].Site), "total ", "", -1)
organs[i].Site = strings.Replace(strings.ToLower(organs[i].Site), " total ", "", -1)
}
} else {
organs[i].Direction = "Remove"
}
}
}
// Remove duplicate organs
tempOrgans = []Organ{}
for _, organ := range organs {
if organ.Direction != "Remove" {
tempOrgans = append(tempOrgans, organ)
}
}
organs = tempOrgans
tempOrgans = []Organ{}
inResult := make(map[string]Organ)
for _, organ := range organs {
if _, ok := inResult[organ.Direction+""+organ.Site]; !ok {
inResult[organ.Direction+organ.Site] = organ
tempOrgans = append(tempOrgans, organ)
}
}
organs = tempOrgans
// Find T-scores for the organs
if len(tScores) != len(organs) {
organBeginOffsets = []float64{}
organEndOffsets = []float64{}
for _, organ := range organs {
organBeginOffsets = append(organBeginOffsets, organ.BeginOffset)
organEndOffsets = append(organEndOffsets, organ.EndOffset)
}
for _, tScore := range tScores {
closestBeginOffsetIndex := findClosestElementIndex(organBeginOffsets, 1, tScore.endOffset)
closestEndOffsetIndex := findClosestElementIndex(organEndOffsets, 1, tScore.beginOffset)
if math.Abs(organBeginOffsets[closestBeginOffsetIndex]-tScore.endOffset) > math.Abs(tScore.beginOffset-organEndOffsets[closestEndOffsetIndex]) {
organs[closestEndOffsetIndex].TScore = tScore.text
} else {
organs[closestBeginOffsetIndex].TScore = tScore.text
}
}
} else {
for i, tScore := range tScores {
organs[i].TScore = tScore.text
}
}
// Find Z-scores for the organs
if len(zScores) != len(organs) {
organBeginOffsets = []float64{}
organEndOffsets = []float64{}
for _, organ := range organs {
organBeginOffsets = append(organBeginOffsets, organ.BeginOffset)
organEndOffsets = append(organEndOffsets, organ.EndOffset)
}
for _, zScore := range zScores {
closestBeginOffsetIndex := findClosestElementIndex(organBeginOffsets, 1, zScore.endOffset)
closestEndOffsetIndex := findClosestElementIndex(organEndOffsets, 1, zScore.beginOffset)
if math.Abs(organBeginOffsets[closestBeginOffsetIndex]-zScore.endOffset) > math.Abs(zScore.beginOffset-organEndOffsets[closestEndOffsetIndex]) {
organs[closestEndOffsetIndex].ZScore = zScore.text
} else {
organs[closestBeginOffsetIndex].ZScore = zScore.text
}
}
} else {
for i, zScore := range zScores {
organs[i].ZScore = zScore.text
}
}
// Find the BMD for the Organs
if len(bmds) != len(organs) {
organBeginOffsets = []float64{}
organEndOffsets = []float64{}
for _, organ := range organs {
organBeginOffsets = append(organBeginOffsets, organ.BeginOffset)
organEndOffsets = append(organEndOffsets, organ.EndOffset)
}
for _, bmd := range bmds {
closestBeginOffsetIndex := findClosestElementIndex(organBeginOffsets, 1, bmd.endOffset)
closestEndOffsetIndex := findClosestElementIndex(organEndOffsets, 1, bmd.beginOffset)
if bmd.endOffset-organBeginOffsets[closestBeginOffsetIndex] > organEndOffsets[closestEndOffsetIndex]-bmd.beginOffset {
organs[closestEndOffsetIndex].Bmd = bmd.text
} else {
organs[closestBeginOffsetIndex].Bmd = bmd.text
}
}
} else {
for i, bmd := range bmds {
organs[i].Bmd = bmd.text
}
}
// Add Id and remove offsets for privacy
tempOrgans = []Organ{}
for i, organ := range organs {
organ.Id = i
organ.BeginOffset = 0
organ.EndOffset = 0
tempOrgans = append(tempOrgans, organ)
}
organs = tempOrgans
// Disabling log for privacy
// log.Info(log.V{"Parser, Organs after setting values": organs})
return organs
}
// findClosestElementIndex finds the closest organ(bone sites) offset to the entity offset and returns the index
func findClosestElementIndex(arr []float64, k int, x float64) int {
return sort.Search(len(arr)-k, func(i int) bool { return x-arr[i] <= arr[i+k]-x })
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment