taylordelehanty/postgres_v5_dose_era.sql

## postgres_v5_dose_era.sql
--------------------------------------------------------------------------------------------------------------
---Adapted to PostgreSQL v5 dose_era from Pure SQL drug_era written by Chris_Knoll: https://gist.github.com/chrisknoll/c820cc12d833db2e3d1e
---INTERVAL set to 30 days

---Chris Knoll's comments are after two dashes
---Taylor Delehanty's comments are after three dashes
---proper schema name needs to replace "<schema>" in the code
---<schema> needs to be replaced with actual schema where dose_era table is located
---<vocabulary_schema> needs to be replaced with actual schema where vocabularies and concepts are located
---works with dose_era_id being self-generated
--------------------------------------------------------------------------------------------------------------


TRUNCATE <schema>.dose_era;

WITH cteDrugTarget(drug_exposure_id, person_id, ingredient_concept_id, unit_concept_id, dose_value, drug_exposure_start_date, days_supply, drug_exposure_end_date) AS
(
	SELECT
		d.drug_exposure_id
		, d.person_id
		, c.concept_id AS ingredient_concept_id
		, d.dose_unit_concept_id AS unit_concept_id
		, d.effective_drug_dose AS dose_value
		, d.drug_exposure_start_date
		, d.days_supply AS days_supply
		, COALESCE(NULLIF(drug_exposure_end_date, NULL), NULLIF(drug_exposure_start_date + (INTERVAL '1 day' * days_supply), drug_exposure_start_date), drug_exposure_start_date + INTERVAL '1 day') AS drug_exposure_end_date
	FROM <schema>.drug_exposure d
	     JOIN <vocabulary_schema>.concept_ancestor ca ON ca.descendant_concept_id = d.drug_concept_id
	     JOIN <vocabulary_schema>.concept c ON ca.ancestor_concept_id = c.concept_id
	     WHERE c.vocabulary_id = 8
	     AND c.concept_class = 'Ingredient'
	     ---AND d.drug_concept_id != 0 ---Our unmapped drug_concept_id's are set as 0 so we don't want different drugs wrapped together. We just look over them
	     ---AND d.days_supply >= 0 ---There was actually data that had days_supply as < 0, which would result in us losing counts at the end because the end-date would be set as before the start date. This is a data-quality error, not a code error, and this AND statement just looks over the erroneous entries so they don't interfere with the rest of the code.
)
-----------------------------------------------------------------------------------------------------------------------------
, cteEndDates(person_id, ingredient_concept_id, unit_concept_id, dose_value, end_date) AS
(
	SELECT
		person_id, ingredient_concept_id, unit_concept_id, dose_value, event_date - INTERVAL '30 days' AS end_date
	FROM
	(
		SELECT person_id, ingredient_concept_id, unit_concept_id, dose_value, event_date, event_type, MAX(start_ordinal) OVER (PARTITION BY person_id, ingredient_concept_id, unit_concept_id, dose_value ORDER BY event_date, event_type ROWS unbounded preceding) AS start_ordinal, ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id, unit_concept_id, dose_value ORDER BY event_date, event_type) AS overall_ord
		FROM
		(
			SELECT person_id, ingredient_concept_id, unit_concept_id, dose_value, drug_exposure_start_date AS event_date, -1 AS event_type, ROW_NUMBER() OVER(PARTITION BY person_id, drug_concept_id, dose_unit_concept_id, effective_drug_dose ORDER BY drug_exposure_start_date) AS start_ordinal
			FROM cteDrugTarget

			UNION ALL

			SELECT person_id, ingredient_concept_id, unit_concept_id, dose_value, drug_exposure_end_date + INTERVAL '30 days', 1 AS event_type, NULL
			FROM cteDrugTarget
		) RAWDATA
	) e
	WHERE (2 * e.start_ordinal) - e.overall_ord = 0
)
-----------------------------------------------------------------------------------------------------------------------------
, cteDoseEraEnds(person_id, drug_concept_id, unit_concept_id, dose_value, drug_exposure_start_date, dose_era_end_date) AS
( SELECT
	dt.person_id
	, dt.ingredient_concept_id
	, dt.unit_concept_id
	, dt.dose_value
	, dt.drug_exposure_start_date
	, MIN(e.end_date) AS era_end_date
FROM cteDrugTarget dt
JOIN cteEndDates e
ON dt.person_id = e.person_id AND dt.ingredient_concept_id = e.ingredient_concept_id AND dt.unit_concept_id = e.unit_concept_id AND dt.dose_value = e.dose_value AND e.end_date >= dt.drug_exposure_start_date
GROUP BY
	dt.drug_exposure_id
	, dt.person_id
	, dt.ingredient_concept_id
	, dt.unit_concept_id
	, dt.dose_value
	, dt.drug_exposure_start_date
-----------------------------------------------------------------------------------------------------------------------------
INSERT INTO <schema>.dose_era(person_id, drug_concept_id, unit_concept_id, dose_value, dose_era_start_date, dose_era_end_date)
SELECT person_id, drug_concept_id, unit_concept_id, dose_value, MIN(drug_exposure_start_date) AS dose_era_start_date, dose_era_end_date
GROUP BY person_id, drug_concept_id, unit_concept_id, dose_value, dose_era_end_date
ORDER BY person_id, drug_concept_id
;
	--------------------------------------------------------------------------------------------------------------
	---Adapted to PostgreSQL v5 dose_era from Pure SQL drug_era written by Chris_Knoll: https://gist.github.com/chrisknoll/c820cc12d833db2e3d1e
	---INTERVAL set to 30 days

	---Chris Knoll's comments are after two dashes
	---Taylor Delehanty's comments are after three dashes
	---proper schema name needs to replace "<schema>" in the code
	---<schema> needs to be replaced with actual schema where dose_era table is located
	---<vocabulary_schema> needs to be replaced with actual schema where vocabularies and concepts are located
	---works with dose_era_id being self-generated
	--------------------------------------------------------------------------------------------------------------


	TRUNCATE <schema>.dose_era;

	WITH cteDrugTarget(drug_exposure_id, person_id, ingredient_concept_id, unit_concept_id, dose_value, drug_exposure_start_date, days_supply, drug_exposure_end_date) AS
	(
	SELECT
	d.drug_exposure_id
	, d.person_id
	, c.concept_id AS ingredient_concept_id
	, d.dose_unit_concept_id AS unit_concept_id
	, d.effective_drug_dose AS dose_value
	, d.drug_exposure_start_date
	, d.days_supply AS days_supply
	, COALESCE(NULLIF(drug_exposure_end_date, NULL), NULLIF(drug_exposure_start_date + (INTERVAL '1 day' * days_supply), drug_exposure_start_date), drug_exposure_start_date + INTERVAL '1 day') AS drug_exposure_end_date
	FROM <schema>.drug_exposure d
	JOIN <vocabulary_schema>.concept_ancestor ca ON ca.descendant_concept_id = d.drug_concept_id
	JOIN <vocabulary_schema>.concept c ON ca.ancestor_concept_id = c.concept_id
	WHERE c.vocabulary_id = 8
	AND c.concept_class = 'Ingredient'
	---AND d.drug_concept_id != 0 ---Our unmapped drug_concept_id's are set as 0 so we don't want different drugs wrapped together. We just look over them
	---AND d.days_supply >= 0 ---There was actually data that had days_supply as < 0, which would result in us losing counts at the end because the end-date would be set as before the start date. This is a data-quality error, not a code error, and this AND statement just looks over the erroneous entries so they don't interfere with the rest of the code.
	)
	-----------------------------------------------------------------------------------------------------------------------------
	, cteEndDates(person_id, ingredient_concept_id, unit_concept_id, dose_value, end_date) AS
	(
	SELECT
	person_id, ingredient_concept_id, unit_concept_id, dose_value, event_date - INTERVAL '30 days' AS end_date
	FROM
	(
	SELECT person_id, ingredient_concept_id, unit_concept_id, dose_value, event_date, event_type, MAX(start_ordinal) OVER (PARTITION BY person_id, ingredient_concept_id, unit_concept_id, dose_value ORDER BY event_date, event_type ROWS unbounded preceding) AS start_ordinal, ROW_NUMBER() OVER (PARTITION BY person_id, ingredient_concept_id, unit_concept_id, dose_value ORDER BY event_date, event_type) AS overall_ord
	FROM
	(
	SELECT person_id, ingredient_concept_id, unit_concept_id, dose_value, drug_exposure_start_date AS event_date, -1 AS event_type, ROW_NUMBER() OVER(PARTITION BY person_id, drug_concept_id, dose_unit_concept_id, effective_drug_dose ORDER BY drug_exposure_start_date) AS start_ordinal
	FROM cteDrugTarget

	UNION ALL

	SELECT person_id, ingredient_concept_id, unit_concept_id, dose_value, drug_exposure_end_date + INTERVAL '30 days', 1 AS event_type, NULL
	FROM cteDrugTarget
	) RAWDATA
	) e
	WHERE (2 * e.start_ordinal) - e.overall_ord = 0
	)
	-----------------------------------------------------------------------------------------------------------------------------
	, cteDoseEraEnds(person_id, drug_concept_id, unit_concept_id, dose_value, drug_exposure_start_date, dose_era_end_date) AS
	( SELECT
	dt.person_id
	, dt.ingredient_concept_id
	, dt.unit_concept_id
	, dt.dose_value
	, dt.drug_exposure_start_date
	, MIN(e.end_date) AS era_end_date
	FROM cteDrugTarget dt
	JOIN cteEndDates e
	ON dt.person_id = e.person_id AND dt.ingredient_concept_id = e.ingredient_concept_id AND dt.unit_concept_id = e.unit_concept_id AND dt.dose_value = e.dose_value AND e.end_date >= dt.drug_exposure_start_date
	GROUP BY
	dt.drug_exposure_id
	, dt.person_id
	, dt.ingredient_concept_id
	, dt.unit_concept_id
	, dt.dose_value
	, dt.drug_exposure_start_date
	-----------------------------------------------------------------------------------------------------------------------------
	INSERT INTO <schema>.dose_era(person_id, drug_concept_id, unit_concept_id, dose_value, dose_era_start_date, dose_era_end_date)
	SELECT person_id, drug_concept_id, unit_concept_id, dose_value, MIN(drug_exposure_start_date) AS dose_era_start_date, dose_era_end_date
	GROUP BY person_id, drug_concept_id, unit_concept_id, dose_value, dose_era_end_date
	ORDER BY person_id, drug_concept_id
	;