Skip to content

Instantly share code, notes, and snippets.

@callahantiff
Last active March 7, 2022 15:15
Show Gist options
  • Save callahantiff/b72339a8d6fb7de31e1912039947605a to your computer and use it in GitHub Desktop.
Save callahantiff/b72339a8d6fb7de31e1912039947605a to your computer and use it in GitHub Desktop.
PatientSimilarity: Exploring the Impact of different Entities and Domains for Rare Disease Phenotyping.
-- RARE DISEASE - RANDOM PATIENTS: Query is designed to retrieve on 10,000 random patients
-- The query seraches only among patients having >9 visits
-- Lasted edited on: 05/04/2018
SELECT v.person_id, count(v.visit_occurrence_id) AS count, p_dat.cond, p_dat.cond_num
FROM CHCO_DeID_Apr2018.visit_occurrence v
RIGHT JOIN
(SELECT person_id, 'Rand' AS cond, 99 AS cond_num FROM CHCO_DeID_Apr2018.person
WHERE person_id NOT IN
(SELECT v.person_id
FROM CHCO_DeID_Apr2018.visit_occurrence v
RIGHT JOIN
(SELECT * FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2) AS p_dat
ON v.person_id = p_dat.person_id
GROUP BY v.person_id
HAVING count(v.visit_occurrence_id) > 9)
ORDER BY RAND()) AS p_dat
ON v.person_id = p_dat.person_id
WHERE v.person_id IS NOT null AND v.visit_occurrence_id IS NOT null
GROUP BY v.person_id, p_dat.cond, p_dat.cond_num
HAVING count > 9
ORDER BY count
LIMIT 11000;
-- CONDITIONS: Query is designed to retrieve condition information on 10,000 random patients
-- The query returns all concepts and raw source values
-- Lasted edited on: 05/24/2018
SELECT * FROM
(SELECT m.person_id,
m.condition_concept_id AS omop_cond_id,
c1.concept_name AS source_cond_label,
m.condition_source_value AS condition_source_value,
m.condition_start_date AS cond_date,
m.visit_occurrence_id AS visit_id,
v.visit_start_date AS visit_date
FROM CHCO_DeID_Apr2018.condition_occurrence m
JOIN CHCO_DeID_Apr2018.concept c1
ON c1.concept_id = m.condition_concept_id
JOIN (SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2) AS p2
ON m.person_id = p2.person_id
JOIN CHCO_DeID_Apr2018.visit_occurrence v
ON m.visit_occurrence_id = v.visit_occurrence_id
WHERE
c1.domain_id = 'Condition'
AND c1.vocabulary_id = 'SNOMED'
AND m.condition_concept_id != 0
order by m.person_id, m.condition_start_date)
UNION ALL
(SELECT m.person_id,
m.condition_concept_id AS omop_cond_id,
'NONE' as source_cond_label,
m.condition_source_value AS condition_source_value,
m.condition_start_date AS cond_date,
m.visit_occurrence_id AS visit_id,
v.visit_start_date AS visit_date
FROM CHCO_DeID_Apr2018.condition_occurrence m
JOIN CHCO_DeID_Apr2018.visit_occurrence v
ON m.visit_occurrence_id = v.visit_occurrence_id
JOIN (SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2) AS p2
ON m.person_id = p2.person_id
where m.condition_concept_id != 0
order by m.person_id, m.condition_start_date)
ORDER BY person_id, visit_date;
-- DEMOGRAPHICS: Query is designed to retrieve demographic information on 10,000 random patients
-- Lasted edited on: 05/04/2018
SELECT
p.person_id,
p.birth_datetime,
c1.concept_name AS gender,
c2.concept_name AS ethnicity,
c3.concept_name AS race,
p.care_site_id,
p.location_id
FROM CHCO_DeID_Apr2018.person p
JOIN CHCO_DeID_Apr2018.concept c1
ON c1.concept_id = p.gender_concept_id
JOIN CHCO_DeID_Apr2018.concept c2
ON c2.concept_id = p.ethnicity_concept_id
JOIN CHCO_DeID_Apr2018.concept c3
ON c3.concept_id = p.race_concept_id
WHERE person_id IN
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2)
GROUP BY p.person_id, p.birth_datetime, gender, ethnicity, race, p.care_site_id, p.location_id;
-- MEASUREMENTS: Query is designed to retrieve lab information on 10,000 random patients
-- The query returns all concepts and raw source values
-- Lasted edited on: 05/24/2018
SELECT * FROM
(SELECT m.person_id AS pat_id,
m.measurement_concept_id AS omop_cond_id, --OMOP concept id
c1.concept_name as source_cond_label,
m.measurement_source_value AS measurement_source_value,
m.visit_occurrence_id AS visit_id,
v.visit_start_date as visit_date
FROM CHCO_DeID_Apr2018.measurement m
JOIN CHCO_DeID_Apr2018.concept c1
ON c1.concept_id = m.measurement_concept_id
join CHCO_DeID_Apr2018.visit_occurrence v
on v.visit_occurrence_id = m.visit_occurrence_id
WHERE c1.domain_id = 'Measurement'
AND c1.vocabulary_id = 'LOINC'
AND m.person_id IN
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2)
AND m.measurement_concept_id != 0
order by visit_date)
UNION ALL
(SELECT m.person_id AS pat_id,
m.measurement_concept_id AS omop_cond_id, --OMOP concept id
'NONE' as source_cond_label,
m.measurement_source_value AS measurement_source_value,
m.visit_occurrence_id AS visit_id,
v.visit_start_date as visit_date
FROM CHCO_DeID_Apr2018.measurement m
join CHCO_DeID_Apr2018.visit_occurrence v
on v.visit_occurrence_id = m.visit_occurrence_id
WHERE m.person_id IN
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2)
AND m.measurement_concept_id != 0
order by visit_date);
-- MEDICATIONS: Query is designed to retrieve medication information on 10,000 random patients
-- The query returns all concepts and raw source values
-- Lasted edited on: 05/24/2018
SELECT * FROM
(SELECT m.person_id AS pat_id,
m.drug_concept_id AS omop_cond_id, --OMOP concept id
c1.concept_name AS source_cond_label,
m.drug_source_value AS drug_source_value,
m.visit_occurrence_id AS visit_id,
v.visit_start_date as visit_date
FROM CHCO_DeID_Apr2018.drug_exposure m
JOIN CHCO_DeID_Apr2018.concept c1
ON c1.concept_id = m.drug_concept_id
JOIN CHCO_DeID_Apr2018.visit_occurrence v
ON m.visit_occurrence_id = v.visit_occurrence_id
WHERE c1.domain_id = 'Drug'
AND c1.vocabulary_id = 'RxNorm'
AND m.person_id IN
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2)
AND m.drug_concept_id != 0
order by visit_date)
UNION ALL
(SELECT m.person_id AS pat_id,
m.drug_concept_id AS omop_cond_id, --OMOP concept id
'NONE' AS source_cond_label,
m.drug_source_value AS drug_source_value,
m.visit_occurrence_id AS visit_id,
v.visit_start_date as visit_date
FROM CHCO_DeID_Apr2018.drug_exposure m
JOIN CHCO_DeID_Apr2018.visit_occurrence v
ON m.visit_occurrence_id = v.visit_occurrence_id
WHERE m.person_id IN
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Random2)
AND m.drug_concept_id != 0
order by visit_date);
-- RARE DISEASE CASES: Query is designed to retrieve test-cases having specific concept codes indicative of CF, SC, CH, and PKU
-- The query seraches only among patients having >9 visits
-- Lasted edited on: 05/04/2018
SELECT
v.person_id,
count(v.visit_occurrence_id) AS count,
p_dat.cond,
p_dat.cond_num,
p_dat.cond_start,
p_dat.age_at_dx
FROM CHCO_DeID_Apr2018.visit_occurrence v
JOIN CHCO_DeID_Apr2018.person d
ON d.person_id = v.person_id
RIGHT JOIN
(SELECT * FROM
(SELECT co.person_id,'PKU' AS cond, 0 AS cond_num,
min(co.condition_start_date) AS cond_start,
DATE_DIFF(min(co.condition_start_date), CAST(DATE(p.birth_datetime) AS DATE), DAY)/365.25 AS age_at_dx
FROM CHCO_DeID_Apr2018.condition_occurrence co
JOIN CHCO_DeID_Apr2018.concept c1
ON c1.concept_id = co.condition_concept_id
JOIN CHCO_DeID_Apr2018.person p
ON p.person_id = co.person_id
WHERE
c1.vocabulary_id = 'SNOMED'
AND c1.domain_id = 'Condition'
AND co.condition_concept_id IN (432872)
GROUP BY co.person_id, cond, cond_num, p.birth_datetime
ORDER BY co.person_id)
UNION ALL
(SELECT co.person_id,'CH' AS cond, 1 AS cond_num,
min(co.condition_start_date) AS cond_start,
DATE_DIFF(min(co.condition_start_date), CAST(DATE(p.birth_datetime) AS DATE), DAY)/365.25 AS age_at_dx
FROM CHCO_DeID_Apr2018.condition_occurrence co
JOIN CHCO_DeID_Apr2018.concept c1
ON c1.concept_id = co.condition_concept_id
JOIN CHCO_DeID_Apr2018.person p
ON p.person_id = co.person_id
WHERE
c1.vocabulary_id = 'SNOMED'
AND c1.domain_id = 'Condition'
AND co.condition_concept_id IN (4314093, 4130017, 4081998)
GROUP BY co.person_id, cond, cond_num, p.birth_datetime
ORDER BY co.person_id)
UNION ALL
(SELECT co.person_id,'SC' AS cond, 3 AS cond_num,
min(co.condition_start_date) AS cond_start,
DATE_DIFF(min(co.condition_start_date), CAST(DATE(p.birth_datetime) AS DATE), DAY)/365.25 AS age_at_dx
FROM CHCO_DeID_Apr2018.condition_occurrence co
JOIN CHCO_DeID_Apr2018.concept c1
ON c1.concept_id = co.condition_concept_id
JOIN CHCO_DeID_Apr2018.person p
ON p.person_id = co.person_id
WHERE
c1.vocabulary_id = 'SNOMED'
AND c1.domain_id = 'Condition'
AND co.condition_concept_id IN (30683, 22281, 443726, 26942, 196943, 443721, 254062, 25518, 321263, 443738, 40485018)
GROUP BY co.person_id, cond, cond_num, p.birth_datetime
ORDER BY co.person_id)
UNION ALL
(SELECT co.person_id,'CF' AS cond, 4 AS cond_num,
min(co.condition_start_date) AS cond_start,
DATE_DIFF(min(co.condition_start_date), CAST(DATE(p.birth_datetime) AS DATE), DAY)/365.25 AS age_at_dx
FROM CHCO_DeID_Apr2018.condition_occurrence co
JOIN CHCO_DeID_Apr2018.concept c1
ON c1.concept_id = co.condition_concept_id
JOIN CHCO_DeID_Apr2018.person p
ON p.person_id = co.person_id
WHERE
c1.vocabulary_id = 'SNOMED'
AND c1.domain_id = 'Condition'
AND co.condition_concept_id IN (254320, 194325, 441267, 434615, 193174, 40479565)
GROUP BY co.person_id, cond, cond_num, p.birth_datetime
ORDER BY co.person_id)) AS p_dat
ON v.person_id = p_dat.person_id
GROUP BY v.person_id, p_dat.cond, p_dat.cond_num, p_dat.cond_start, p_dat.age_at_dx
HAVING count > 9
ORDER BY count;
-- CONDITIONS: Query is designed to retrieve condition information on Rare Disease Cases
-- The query returns all concepts and raw source values
-- Lasted edited on: 05/24/2018
SELECT * FROM
(SELECT m.person_id,
m.condition_concept_id AS omop_cond_id,
c1.concept_name AS source_cond_label,
m.condition_source_value AS condition_source_value,
p2.cond_start,
m.condition_start_date AS cond_date,
m.visit_occurrence_id AS visit_id,
v.visit_start_date AS visit_date
FROM CHCO_DeID_Apr2018.condition_occurrence m
JOIN CHCO_DeID_Apr2018.concept c1
ON c1.concept_id = m.condition_concept_id
JOIN (SELECT person_id, cond_start FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2) AS p2
ON m.person_id = p2.person_id
JOIN CHCO_DeID_Apr2018.visit_occurrence v
ON m.visit_occurrence_id = v.visit_occurrence_id
WHERE
c1.domain_id = 'Condition'
AND c1.vocabulary_id = 'SNOMED'
order by m.person_id, m.condition_start_date)
UNION ALL
(SELECT m.person_id,
m.condition_concept_id AS omop_cond_id,
'NONE' as source_cond_label,
m.condition_source_value AS condition_source_value,
p2.cond_start,
m.condition_start_date AS cond_date,
m.visit_occurrence_id AS visit_id,
v.visit_start_date AS visit_date
FROM CHCO_DeID_Apr2018.condition_occurrence m
JOIN CHCO_DeID_Apr2018.visit_occurrence v
ON m.visit_occurrence_id = v.visit_occurrence_id
JOIN (SELECT person_id, cond_start FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2) AS p2
ON m.person_id = p2.person_id
order by m.person_id, m.condition_start_date)
ORDER BY person_id, visit_date;
-- DEMOGRAPHICS: Query is designed to retirve demographic information on Rare Disease Patients
-- Lasted edited on: 05/04/2018
SELECT
p.person_id,
p.birth_datetime,
c1.concept_name AS gender,
c2.concept_name AS ethnicity,
c3.concept_name AS race,
p.care_site_id,
p.location_id
FROM CHCO_DeID_Apr2018.person p
JOIN CHCO_DeID_Apr2018.concept c1
ON c1.concept_id = p.gender_concept_id
JOIN CHCO_DeID_Apr2018.concept c2
ON c2.concept_id = p.ethnicity_concept_id
JOIN CHCO_DeID_Apr2018.concept c3
ON c3.concept_id = p.race_concept_id
WHERE
person_id IN (SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2)
GROUP BY p.person_id, p.birth_datetime, gender, ethnicity, race, p.care_site_id, p.location_id;
-- MEDICATIONS: Query is designed to retrieve lab information on Rare Disease Cases
-- The query returns all concepts and raw source values
-- Lasted edited on: 05/24/2018
SELECT * FROM
(SELECT m.person_id AS pat_id,
m.measurement_concept_id AS omop_cond_id, --OMOP concept id
c1.concept_name as source_cond_label,
m.measurement_source_value AS measurement_source_value,
m.visit_occurrence_id AS visit_id,
v.visit_start_date as visit_date
FROM CHCO_DeID_Apr2018.measurement m
JOIN CHCO_DeID_Apr2018.concept c1
ON c1.concept_id = m.measurement_concept_id
join CHCO_DeID_Apr2018.visit_occurrence v
on v.visit_occurrence_id = m.visit_occurrence_id
WHERE c1.domain_id = 'Measurement'
AND c1.vocabulary_id = 'LOINC'
AND m.person_id IN
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2)
order by visit_date)
UNION ALL
(SELECT m.person_id AS pat_id,
m.measurement_concept_id AS omop_cond_id, --OMOP concept id
'NONE' as source_cond_label,
m.measurement_source_value AS measurement_source_value,
m.visit_occurrence_id AS visit_id,
v.visit_start_date as visit_date
FROM CHCO_DeID_Apr2018.measurement m
join CHCO_DeID_Apr2018.visit_occurrence v
on v.visit_occurrence_id = m.visit_occurrence_id
WHERE m.person_id IN
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2)
order by visit_date);
-- MEDICATIONS: Query is designed to retrieve medication information on Rare Disease Cases
-- The query returns all concepts and raw source values
-- Lasted edited on: 05/24/2018
SELECT * FROM
(SELECT m.person_id AS pat_id,
m.drug_concept_id AS omop_cond_id, --OMOP concept id
c1.concept_name AS source_cond_label,
m.drug_source_value AS drug_source_value,
m.visit_occurrence_id AS visit_id,
v.visit_start_date as visit_date
FROM CHCO_DeID_Apr2018.drug_exposure m
JOIN CHCO_DeID_Apr2018.concept c1
ON c1.concept_id = m.drug_concept_id
JOIN CHCO_DeID_Apr2018.visit_occurrence v
ON m.visit_occurrence_id = v.visit_occurrence_id
WHERE c1.domain_id = 'Drug'
AND c1.vocabulary_id = 'RxNorm'
AND m.person_id IN
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2)
order by visit_date)
UNION ALL
(SELECT m.person_id AS pat_id,
m.drug_concept_id AS omop_cond_id, --OMOP concept id
'NONE' AS source_cond_label,
m.drug_source_value AS drug_source_value,
m.visit_occurrence_id AS visit_id,
v.visit_start_date as visit_date
FROM CHCO_DeID_Apr2018.drug_exposure m
JOIN CHCO_DeID_Apr2018.visit_occurrence v
ON m.visit_occurrence_id = v.visit_occurrence_id
WHERE
m.person_id IN
(SELECT person_id FROM CHCO_DeID_Apr2018.PatSim_RareDisease_Cases2)
order by visit_date);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment