Skip to content

Instantly share code, notes, and snippets.

@fredkingham
Created November 15, 2022 11:56
Show Gist options
  • Save fredkingham/f6fc3359a4249d5c70fb44914c6d81db to your computer and use it in GitHub Desktop.
Save fredkingham/f6fc3359a4249d5c70fb44914c6d81db to your computer and use it in GitHub Desktop.
Finds duplicates with the same hospital number and categorises whether they have elcid native data
from django.core.management.base import BaseCommand
from django.db.models import Count
from opal.core import subrecords
from opal.models import Patient
from elcid.models import Demographics
import logging
logger = logging.getLogger('intrahospital_api')
subrecordsToIgnore = set([
'InitialPatientLoad',
'Demographics',
'ContactInformation',
'NextOfKinDetails',
'GPDetails',
'PositiveBloodCultureHistory',
'ICUHandover',
'ExternalDemographics',
])
def find_duplicates():
"""
Returns the patients that are duplicates as a list of tuples
where the tuples looks like (from_patient, to_patient,)
ie the defunct patient and the patient to copy records to.
"""
duplicate_patients = []
unmergable = []
dup_dempgraphics = (
Demographics.objects.values("hospital_number")
.annotate(cnt=Count("id"))
.filter(cnt__gt=1)
)
duplicate_hns = [i["hospital_number"] for i in dup_dempgraphics if i["hospital_number"].strip()]
logger.info(f'Found {len(duplicate_hns)} duplicate hns with the same number')
for duplicate_hn in duplicate_hns:
patients = Patient.objects.filter(
demographics__hospital_number=duplicate_hn
)
if len(patients) > 2:
# In theory this is not wrong, in practice it does not happen, so if it does
# something unexpected has happened.
raise ValueError(f'We have found {len(patients)} for hn {duplicate_hn}')
patient_1 = patients[0]
patient_1_native_records = elcid_native_records(patient_1)
patient_2 = patients[1]
patient_2_native_records = elcid_native_records(patient_2)
if len(patient_1_native_records) and len(patient_2_native_records):
logger.info(f'====== Unable to merge hn {duplicate_hn} as they both have duplicate records')
logger.info(f'=== Patient 1 ({patient_1.id}) has:')
for record in patient_1_native_records:
logger.info(f'{record.__class__.__name__} {record.id}')
logger.info(f'=== Patient 2 ({patient_2.id}) has:')
for record in patient_2_native_records:
logger.info(f'{record.__class__.__name__} {record.id}')
unmergable.append((patient_1, patient_2,))
elif len(patient_1_native_records) and not len(patient_2_native_records):
duplicate_patients.append((patient_2, patient_1))
else:
duplicate_patients.append((patient_1, patient_2))
return duplicate_patients, unmergable
def elcid_native_records(patient):
result = []
for subrecord in subrecords.episode_subrecords():
if subrecord._is_singleton:
result.extend(subrecord.objects.filter(episode__patient_id=patient.id).exclude(updated=None))
else:
result.extend(subrecord.objects.filter(episode__patient_id=patient.id))
for subrecord in subrecords.patient_subrecords():
if subrecord.__name__ in subrecordsToIgnore:
continue
if subrecord._is_singleton:
result.extend(subrecord.objects.filter(patient_id=patient.id).exclude(updated=None))
else:
result.extend(subrecord.objects.filter(patient_id=patient.id))
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment