Skip to content

Instantly share code, notes, and snippets.

@dynajoe
Last active December 20, 2015 12:59
Show Gist options
  • Save dynajoe/6135431 to your computer and use it in GitHub Desktop.
Save dynajoe/6135431 to your computer and use it in GitHub Desktop.
detect name or birth mismatch
var http = require('http');
var modelNames = {'RadExam': 4, 'PathExam': 1, 'LabResult': 8/*, 'Patient': 1*/ };
// var modelNames = {'RadExam': 2, 'PathExam': 2, 'LabResult': 2, 'Patient': 1 };
// var rootUrl = 'illum-qa-india.softek.local:8080/solr/';
var rootUrl = 'illum-index-prod:8080/solr/';
var modelNames = {'RadExam': 4, 'PathExam': 1, 'LabResult': 8, 'Patient': 1 };
var shards = '';
var pageSize = 2000;
var pages = 2;
for (var m in modelNames) {
var model = modelNames[m];
for (var i = 0; i < model; i++) {
var queryShard = m + '_Models_' + ('00' + i).slice(-2);
shards += rootUrl + queryShard + ',';
}
}
var formQuery = function (page, pageSize) {
var start = (page) * pageSize;
return 'http://' + rootUrl + queryShard + '/select/' +
'?q=*:*&rows=' + pageSize +
'&start=' + start +
'&wt=json&' +
'shards=' + shards +
'&group=true&group.field=mrn&fl=id,key,mrn,date,birth,name,gender,patientfirstname,patientlastname,[shard]&group.limit=999&sort=key desc';
};
var docGroupTypes = [ 'patientfirstname', 'patientlastname', 'birth', 'gender' ];
var getDocValue = function (doc, key) {
return doc[key] ? (doc[key][0] || doc[key]) : null;
};
var visitedMrns = {};
var parseData = function (data) {
var mrnGroup = data.grouped.mrn;
var patientNames = {};
var suspiciousMrns = [];
for (var g in mrnGroup.groups) {
var group = mrnGroup.groups[g];
if (visitedMrns[group.groupValue])
continue;
visitedMrns[group.groupValue] = true;
if (group.doclist.numFound <= 1) continue;
var docGroups = { count: group.doclist.numFound, groups: {} };
for (var d in group.doclist.docs) {
var doc = group.doclist.docs[d];
docGroups.count++;
docGroups.patient = docGroups.patient || {};
docGroups.patient.mrn = getDocValue(doc, 'mrn');
for (var t in docGroupTypes) {
var type = docGroupTypes[t];
var value = getDocValue(doc, type);
if (value) {
var tg = docGroups.groups[type] || {};
var sg = tg[value] || [];
sg.push({ mrn: docGroups.patient.mrn, date: getDocValue(doc, 'date'), value: value, key: doc.key, shard: doc['[shard]'] });
tg[value] = sg;
docGroups.groups[type] = tg;
}
}
if (doc['[shard]'].indexOf('Patient') >= 0) {
docGroups['patient'] = doc;
}
}
for (var dg in docGroups.groups) {
if (Object.keys(docGroups.groups[dg]).length > 1) {
suspiciousMrns.push(docGroups);
break;
}
}
}
analyze(suspiciousMrns);
return { suspicous: suspiciousMrns.length, total: mrnGroup.groups.length };
};
var analyze = function (suspiciousPatients) {
var summaries = [];
for (var i in suspiciousPatients) {
var p = suspiciousPatients[i];
var groups = p.groups;
var patientModel = p.patient;
var summary = { patient: patientModel, docs: p.count, changes: {} };
for (var t in groups) {
var values = Object.keys(groups[t]);
if (values.length > 1) {
var changed_values = [];
for (var v in values) {
var d = groups[t][values[v]];
var shards = {};
for (var x in d) {
var li = d[x].shard.lastIndexOf('/solr') + 6;
var shard = d[x].shard.substring(li, d[x].shard.length);
shards[shard] = (shards[shard] || 0) + 1;
}
var value = d[0].value;
var date = d[0].date;
var key = d[0].key;
changed_values.push({ count: d.length, date: date, value: value, shards: shards });
}
summary.changes[t] = changed_values;
}
}
summaries.push(summary);
}
scoreSummaries(summaries);
};
var scoreSummaries = function (summaries) {
for (var s in summaries) {
scoreSummary(summaries[s]);
}
writeSummaries(summaries);
};
var createCsv = function (summary) {
var columns = [summary.patient.mrn, summary.score];
var differences = [];
for (var i in docGroupTypes) {
var type = docGroupTypes[i];
var changes = summary.changes[type];
var details = [];
for (var x in changes) {
details.push(changes[x].value + '(' + changes[x].count + ')');
}
if (changes && changes.length > 0) {
differences.push(type);
}
columns.push('"' + details.join(', ') + '"');
}
columns.push('"' + differences.join(', ') + '"');
return columns.join(',');
};
var writeLines = function (lines) {
var chunk = lines.join('\n');
require('fs').appendFileSync('out.csv', chunk + '\n');
};
var writeSummaries = function (summaries) {
var lines = [];
for (var s in summaries) {
summary = summaries[s];
lines.push(createCsv(summary));
if (lines % 20 == 0) {
writeLines(lines);
lines = [];
}
}
writeLines(lines);
};
var scoreGenderChanges = function (changes) {
var max = 0;
var most_likely = null;
var next_likely = null;
for (var g in changes) {
var change = changes[g];
if (change.count >= max) {
next_likely = most_likely;
most_likely = change;
max = change.count;
}
}
if (next_likely && next_likely.count / most_likely.count > 0.3) {
return 1;
}
return 0;
};
var scoreBirthChanges = function (changes) {
var years = {};
var days = {};
for (var g in changes) {
var change = changes[g].value;
var year = change.substring(0, 4);
var day = change.substring(4, change.length);
years[year] = (years[year] || 0) + 1;
days[day] = (days[day] || 0) + 1;
}
var score = 0;
if (Object.keys(years) > 2) {
score += 0.2;
}
if (Object.keys(days) > 2) {
score += 0.5;
}
return score;
};
var scoreSummary = function (summary) {
var patient = summary.patient;
var total_docs = summary.docs;
var score = 0;
for (var type in summary.changes) {
var type_changes = summary.changes[type];
score += scoreWordDistance(type_changes);
}
//If both first name and last name changed increase the score
var i = 0;
for (var type in summary.changes) {
if (type == 'patientfirstname' || type == 'patientlastname') {
i++;
}
else if (type == 'gender') {
score += scoreGenderChanges(summary.changes['gender']);
} else if (type == 'birth') {
score += scoreBirthChanges(summary.changes['gender'])
}
}
if (i == 2) score += 0.5;
summary.score = score;
};
var scoreWordDistance = function (changes) {
var total_distance = 0;
var num_distances = 0;
for (var i in changes) {
for (var j in changes) {
if (changes[i] == changes[j]) continue;
total_distance += distance(changes[i].value, changes[j].value);
num_distances++;
}
}
return total_distance / num_distances;
};
var distance = function (a, b) {
if (a.length == 0) return b.length;
if (b.length == 0) return a.length;
var matrix = [];
var i;
for (i = 0; i <= b.length; i++){
matrix[i] = [i];
}
var j;
for (j = 0; j <= a.length; j++){
matrix[0][j] = j;
}
for (i = 1; i <= b.length; i++){
for (j = 1; j <= a.length; j++){
if (b.charAt(i-1) == a.charAt(j-1)){
matrix[i][j] = matrix[i-1][j-1];
} else {
matrix[i][j] = Math.min(matrix[i-1][j-1] + 1, // substitution
Math.min(matrix[i][j-1] + 1, // insertion
matrix[i-1][j] + 1)); // deletion
}
}
}
return matrix[b.length][a.length] / a.length;
};
var waiting = 0;
var suspiciousCount = 0;
var totalDocs = 0;
for (var i = 0; i < pages; i++) {
(function (page) {
var query = formQuery(page, pageSize);
console.log('Page: ' + page + ' Query: ' + query);
console.log('');
waiting++;
totalDocs += pageSize;
http.get(query, function (res) {
var result = '';
res.on('data', function (d) {
result += d.toString();
});
res.on('end', function () {
var r = parseData(JSON.parse(result));
suspiciousCount = suspiciousCount + r.suspicous;
totalDocs = totalDocs + r.total;
waiting--;
console.log('Finished page ' + page);
if (waiting == 0) {
var visitedCount = Object.keys(visitedMrns).length;
var percent = Math.round(suspiciousCount / visitedCount * 1000) / 1000;
console.log('Found ' + suspiciousCount + ' of ' + visitedCount + ' patients. ~' + percent + '%');
}
});
});
})(i);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment