Instantly share code, notes, and snippets.

Embed
What would you like to do?
v5_CDM_populate_dose_era.sql
--------------------------------------------------------------------------------------------------------------
---Adapted to PostgreSQL v5 dose_era from Pure SQL drug_era written by Chris_Knoll: https://gist.github.com/chrisknoll/c820cc12d833db2e3d1e
---INTERVAL set to 30 days
---Chris Knoll's comments are after two dashes
---Taylor Delehanty's comments are after three dashes
---proper schema name needs to replace "<schema>" in the code
---<schema> needs to be replaced with actual schema where dose_era table is located
---<vocabulary_schema> needs to be replaced with actual schema where vocabularies and concepts are located
---works with dose_era_id being self-generated
--------------------------------------------------------------------------------------------------------------
TRUNCATE <schema>.dose_era;
WITH cteDrugTarget(drug_exposure_id, person_id, ingredient_concept_id, unit_concept_id, dose_value, drug_exposure_start_date, days_supply, drug_exposure_end_date) AS
(
SELECT
d.drug_exposure_id
, d.person_id
, c.concept_id AS ingredient_concept_id
, d.dose_unit_concept_id AS unit_concept_id
, d.effective_drug_dose AS dose_value
, d.drug_exposure_start_date
, d.days_supply AS days_supply
, COALESCE(NULLIF(drug_exposure_end_date, NULL), NULLIF(drug_exposure_start_date + (INTERVAL '1 day' * days_supply), drug_exposure_start_date), drug_exposure_start_date + INTERVAL '1 day') AS drug_exposure_end_date
FROM <schema>.drug_exposure d
JOIN <vocabulary_schema>.concept_ancestor ca ON ca.descendant_concept_id = d.drug_concept_id
JOIN <vocabulary_schema>.concept c ON ca.ancestor_concept_id = c.concept_id
WHERE c.vocabulary_id = 8
AND c.concept_class = 'Ingredient'
---AND d.drug_concept_id != 0 ---Our unmapped drug_concept_id's are set as 0 so we don't want different drugs wrapped together. We just look over them
---AND d.days_supply >= 0 ---There was actually data that had days_supply as < 0, which would result in us losing counts at the end because the end-date would be set as before the start date. This is a data-quality error, not a code error, and this AND statement just looks over the erroneous entries so they don't interfere with the rest of the code.
)
-----------------------------------------------------------------------------------------------------------------------------
, cteEndDates(person_id, ingredient_concept_id, unit_concept_id, dose_value, end_date) AS
(
SELECT
person_id, ingredient_concept_id, unit_concept_id, dose_value, event_date - INTERVAL '30 days' AS end_date
FROM
(
SELECT person_id, ingredient_concept_id, unit_concept_id, dose_value, event_date, event_type, MAX(start_ordinal) OVER (PARTITION BY person_id, ingredient_concept_id, unit_concept_id, dose_value ORDER BY event_date, event_type ROWS unbounded preceding) AS start_ordinal, ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id, unit_concept_id, dose_value ORDER BY event_date, event_type) AS overall_ord
FROM
(
SELECT person_id, ingredient_concept_id, unit_concept_id, dose_value, drug_exposure_start_date AS event_date, -1 AS event_type, ROW_NUMBER() OVER(PARTITION BY person_id, drug_concept_id, dose_unit_concept_id, effective_drug_dose ORDER BY drug_exposure_start_date) AS start_ordinal
FROM cteDrugTarget
UNION ALL
SELECT person_id, ingredient_concept_id, unit_concept_id, dose_value, drug_exposure_end_date + INTERVAL '30 days', 1 AS event_type, NULL
FROM cteDrugTarget
) RAWDATA
) e
WHERE (2 * e.start_ordinal) - e.overall_ord = 0
)
-----------------------------------------------------------------------------------------------------------------------------
, cteDoseEraEnds(person_id, drug_concept_id, unit_concept_id, dose_value, drug_exposure_start_date, dose_era_end_date) AS
( SELECT
dt.person_id
, dt.ingredient_concept_id
, dt.unit_concept_id
, dt.dose_value
, dt.drug_exposure_start_date
, MIN(e.end_date) AS era_end_date
FROM cteDrugTarget dt
JOIN cteEndDates e
ON dt.person_id = e.person_id AND dt.ingredient_concept_id = e.ingredient_concept_id AND dt.unit_concept_id = e.unit_concept_id AND dt.dose_value = e.dose_value AND e.end_date >= dt.drug_exposure_start_date
GROUP BY
dt.drug_exposure_id
, dt.person_id
, dt.ingredient_concept_id
, dt.unit_concept_id
, dt.dose_value
, dt.drug_exposure_start_date
-----------------------------------------------------------------------------------------------------------------------------
INSERT INTO <schema>.dose_era(person_id, drug_concept_id, unit_concept_id, dose_value, dose_era_start_date, dose_era_end_date)
SELECT person_id, drug_concept_id, unit_concept_id, dose_value, MIN(drug_exposure_start_date) AS dose_era_start_date, dose_era_end_date
GROUP BY person_id, drug_concept_id, unit_concept_id, dose_value, dose_era_end_date
ORDER BY person_id, drug_concept_id
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment