Last active
December 20, 2015 12:59
-
-
Save dynajoe/6135431 to your computer and use it in GitHub Desktop.
detect name or birth mismatch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var http = require('http'); | |
var modelNames = {'RadExam': 4, 'PathExam': 1, 'LabResult': 8/*, 'Patient': 1*/ }; | |
// var modelNames = {'RadExam': 2, 'PathExam': 2, 'LabResult': 2, 'Patient': 1 }; | |
// var rootUrl = 'illum-qa-india.softek.local:8080/solr/'; | |
var rootUrl = 'illum-index-prod:8080/solr/'; | |
var modelNames = {'RadExam': 4, 'PathExam': 1, 'LabResult': 8, 'Patient': 1 }; | |
var shards = ''; | |
var pageSize = 2000; | |
var pages = 2; | |
for (var m in modelNames) { | |
var model = modelNames[m]; | |
for (var i = 0; i < model; i++) { | |
var queryShard = m + '_Models_' + ('00' + i).slice(-2); | |
shards += rootUrl + queryShard + ','; | |
} | |
} | |
var formQuery = function (page, pageSize) { | |
var start = (page) * pageSize; | |
return 'http://' + rootUrl + queryShard + '/select/' + | |
'?q=*:*&rows=' + pageSize + | |
'&start=' + start + | |
'&wt=json&' + | |
'shards=' + shards + | |
'&group=true&group.field=mrn&fl=id,key,mrn,date,birth,name,gender,patientfirstname,patientlastname,[shard]&group.limit=999&sort=key desc'; | |
}; | |
var docGroupTypes = [ 'patientfirstname', 'patientlastname', 'birth', 'gender' ]; | |
var getDocValue = function (doc, key) { | |
return doc[key] ? (doc[key][0] || doc[key]) : null; | |
}; | |
var visitedMrns = {}; | |
var parseData = function (data) { | |
var mrnGroup = data.grouped.mrn; | |
var patientNames = {}; | |
var suspiciousMrns = []; | |
for (var g in mrnGroup.groups) { | |
var group = mrnGroup.groups[g]; | |
if (visitedMrns[group.groupValue]) | |
continue; | |
visitedMrns[group.groupValue] = true; | |
if (group.doclist.numFound <= 1) continue; | |
var docGroups = { count: group.doclist.numFound, groups: {} }; | |
for (var d in group.doclist.docs) { | |
var doc = group.doclist.docs[d]; | |
docGroups.count++; | |
docGroups.patient = docGroups.patient || {}; | |
docGroups.patient.mrn = getDocValue(doc, 'mrn'); | |
for (var t in docGroupTypes) { | |
var type = docGroupTypes[t]; | |
var value = getDocValue(doc, type); | |
if (value) { | |
var tg = docGroups.groups[type] || {}; | |
var sg = tg[value] || []; | |
sg.push({ mrn: docGroups.patient.mrn, date: getDocValue(doc, 'date'), value: value, key: doc.key, shard: doc['[shard]'] }); | |
tg[value] = sg; | |
docGroups.groups[type] = tg; | |
} | |
} | |
if (doc['[shard]'].indexOf('Patient') >= 0) { | |
docGroups['patient'] = doc; | |
} | |
} | |
for (var dg in docGroups.groups) { | |
if (Object.keys(docGroups.groups[dg]).length > 1) { | |
suspiciousMrns.push(docGroups); | |
break; | |
} | |
} | |
} | |
analyze(suspiciousMrns); | |
return { suspicous: suspiciousMrns.length, total: mrnGroup.groups.length }; | |
}; | |
var analyze = function (suspiciousPatients) { | |
var summaries = []; | |
for (var i in suspiciousPatients) { | |
var p = suspiciousPatients[i]; | |
var groups = p.groups; | |
var patientModel = p.patient; | |
var summary = { patient: patientModel, docs: p.count, changes: {} }; | |
for (var t in groups) { | |
var values = Object.keys(groups[t]); | |
if (values.length > 1) { | |
var changed_values = []; | |
for (var v in values) { | |
var d = groups[t][values[v]]; | |
var shards = {}; | |
for (var x in d) { | |
var li = d[x].shard.lastIndexOf('/solr') + 6; | |
var shard = d[x].shard.substring(li, d[x].shard.length); | |
shards[shard] = (shards[shard] || 0) + 1; | |
} | |
var value = d[0].value; | |
var date = d[0].date; | |
var key = d[0].key; | |
changed_values.push({ count: d.length, date: date, value: value, shards: shards }); | |
} | |
summary.changes[t] = changed_values; | |
} | |
} | |
summaries.push(summary); | |
} | |
scoreSummaries(summaries); | |
}; | |
var scoreSummaries = function (summaries) { | |
for (var s in summaries) { | |
scoreSummary(summaries[s]); | |
} | |
writeSummaries(summaries); | |
}; | |
var createCsv = function (summary) { | |
var columns = [summary.patient.mrn, summary.score]; | |
var differences = []; | |
for (var i in docGroupTypes) { | |
var type = docGroupTypes[i]; | |
var changes = summary.changes[type]; | |
var details = []; | |
for (var x in changes) { | |
details.push(changes[x].value + '(' + changes[x].count + ')'); | |
} | |
if (changes && changes.length > 0) { | |
differences.push(type); | |
} | |
columns.push('"' + details.join(', ') + '"'); | |
} | |
columns.push('"' + differences.join(', ') + '"'); | |
return columns.join(','); | |
}; | |
var writeLines = function (lines) { | |
var chunk = lines.join('\n'); | |
require('fs').appendFileSync('out.csv', chunk + '\n'); | |
}; | |
var writeSummaries = function (summaries) { | |
var lines = []; | |
for (var s in summaries) { | |
summary = summaries[s]; | |
lines.push(createCsv(summary)); | |
if (lines % 20 == 0) { | |
writeLines(lines); | |
lines = []; | |
} | |
} | |
writeLines(lines); | |
}; | |
var scoreGenderChanges = function (changes) { | |
var max = 0; | |
var most_likely = null; | |
var next_likely = null; | |
for (var g in changes) { | |
var change = changes[g]; | |
if (change.count >= max) { | |
next_likely = most_likely; | |
most_likely = change; | |
max = change.count; | |
} | |
} | |
if (next_likely && next_likely.count / most_likely.count > 0.3) { | |
return 1; | |
} | |
return 0; | |
}; | |
var scoreBirthChanges = function (changes) { | |
var years = {}; | |
var days = {}; | |
for (var g in changes) { | |
var change = changes[g].value; | |
var year = change.substring(0, 4); | |
var day = change.substring(4, change.length); | |
years[year] = (years[year] || 0) + 1; | |
days[day] = (days[day] || 0) + 1; | |
} | |
var score = 0; | |
if (Object.keys(years) > 2) { | |
score += 0.2; | |
} | |
if (Object.keys(days) > 2) { | |
score += 0.5; | |
} | |
return score; | |
}; | |
var scoreSummary = function (summary) { | |
var patient = summary.patient; | |
var total_docs = summary.docs; | |
var score = 0; | |
for (var type in summary.changes) { | |
var type_changes = summary.changes[type]; | |
score += scoreWordDistance(type_changes); | |
} | |
//If both first name and last name changed increase the score | |
var i = 0; | |
for (var type in summary.changes) { | |
if (type == 'patientfirstname' || type == 'patientlastname') { | |
i++; | |
} | |
else if (type == 'gender') { | |
score += scoreGenderChanges(summary.changes['gender']); | |
} else if (type == 'birth') { | |
score += scoreBirthChanges(summary.changes['gender']) | |
} | |
} | |
if (i == 2) score += 0.5; | |
summary.score = score; | |
}; | |
var scoreWordDistance = function (changes) { | |
var total_distance = 0; | |
var num_distances = 0; | |
for (var i in changes) { | |
for (var j in changes) { | |
if (changes[i] == changes[j]) continue; | |
total_distance += distance(changes[i].value, changes[j].value); | |
num_distances++; | |
} | |
} | |
return total_distance / num_distances; | |
}; | |
var distance = function (a, b) { | |
if (a.length == 0) return b.length; | |
if (b.length == 0) return a.length; | |
var matrix = []; | |
var i; | |
for (i = 0; i <= b.length; i++){ | |
matrix[i] = [i]; | |
} | |
var j; | |
for (j = 0; j <= a.length; j++){ | |
matrix[0][j] = j; | |
} | |
for (i = 1; i <= b.length; i++){ | |
for (j = 1; j <= a.length; j++){ | |
if (b.charAt(i-1) == a.charAt(j-1)){ | |
matrix[i][j] = matrix[i-1][j-1]; | |
} else { | |
matrix[i][j] = Math.min(matrix[i-1][j-1] + 1, // substitution | |
Math.min(matrix[i][j-1] + 1, // insertion | |
matrix[i-1][j] + 1)); // deletion | |
} | |
} | |
} | |
return matrix[b.length][a.length] / a.length; | |
}; | |
var waiting = 0; | |
var suspiciousCount = 0; | |
var totalDocs = 0; | |
for (var i = 0; i < pages; i++) { | |
(function (page) { | |
var query = formQuery(page, pageSize); | |
console.log('Page: ' + page + ' Query: ' + query); | |
console.log(''); | |
waiting++; | |
totalDocs += pageSize; | |
http.get(query, function (res) { | |
var result = ''; | |
res.on('data', function (d) { | |
result += d.toString(); | |
}); | |
res.on('end', function () { | |
var r = parseData(JSON.parse(result)); | |
suspiciousCount = suspiciousCount + r.suspicous; | |
totalDocs = totalDocs + r.total; | |
waiting--; | |
console.log('Finished page ' + page); | |
if (waiting == 0) { | |
var visitedCount = Object.keys(visitedMrns).length; | |
var percent = Math.round(suspiciousCount / visitedCount * 1000) / 1000; | |
console.log('Found ' + suspiciousCount + ' of ' + visitedCount + ' patients. ~' + percent + '%'); | |
} | |
}); | |
}); | |
})(i); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment