taylordelehanty/postgres_v5_condition_era.sql

## postgres_v5_condition_era.sql
--------------------------------------------------------------------------------------------------------------
---Adapted to PostgreSQL condition_era from Pure SQL drug_era written by Chris_Knoll: https://gist.github.com/chrisknoll/c820cc12d833db2e3d1e
---Upgraded to v5 OMOP
---INTERVAL set to 30 days

---Chris Knoll's comments are after two dashes
---Taylor Delehanty's comments are after three dashes
--------------------------------------------------------------------------------------------------------------
TRUNCATE <schema>.condition_era;

WITH cteConditionTarget (condition_occurrence_id, person_id, condition_concept_id, condition_start_date, condition_end_date) AS
(
	SELECT co.condition_occurrence_id, co.person_id, co.condition_concept_id, co.condition_start_date,
	       COALESCE(NULLIF(co.condition_end_date,NULL), condition_start_date + INTERVAL '1 day') AS condition_end_date
	---Format for FROM statement below is like this: "FROM <schema>.<table> co"
	FROM omop.condition_occurrence co
),
--------------------------------------------------------------------------------------------------------------
cteEndDates (person_id, condition_concept_id, end_date) AS -- the magic
(
	SELECT person_id, condition_concept_id, event_date - INTERVAL '30 days' AS end_date -- unpad the end date
	FROM
	(
		SELECT person_id, condition_concept_id, event_date, event_type,
		MAX(start_ordinal) OVER (PARTITION BY person_id, condition_concept_id ORDER BY event_date, event_type ROWS UNBOUNDED PRECEDING) AS start_ordinal, -- this pulls the current START down from the prior rows so that the NULLs from the END DATES will contain a value we can compare with
		ROW_NUMBER() OVER (PARTITION BY person_id, condition_concept_id ORDER BY event_date, event_type) AS overall_ord -- this re-numbers the inner UNION so all rows are numbered ordered by the event date
		FROM
		(
			-- select the start dates, assigning a row number to each
			SELECT person_id, condition_concept_id, condition_start_date AS event_date, -1 AS event_type, ROW_NUMBER() OVER (PARTITION BY person_id, condition_concept_id ORDER BY condition_start_date) AS start_ordinal
			FROM cteConditionTarget

			UNION ALL

			-- pad the end dates by 30 to allow a grace period for overlapping ranges.
			SELECT person_id, condition_concept_id, condition_end_date + INTERVAL '30 days', 1 AS event_type, NULL
			FROM cteConditionTarget
		) RAWDATA
	) e
	WHERE (2 * e.start_ordinal) - e.overall_ord = 0
),
--------------------------------------------------------------------------------------------------------------
cteConditionEnds (person_id, condition_concept_id, condition_start_date, era_end_date) AS
(
SELECT
        c.person_id,
	c.condition_concept_id,
	c.condition_start_date,
	MIN(e.end_date) AS era_end_date
FROM cteConditionTarget c
JOIN cteEndDates e ON c.person_id = e.person_id AND c.condition_concept_id = e.condition_concept_id AND e.end_date >= c.condition_start_date
GROUP BY
---In Chris's original condition_era code, condition_occurrence_id is not in this table, but when at SAFTINet we ran through our data, we found that our condition_occurrence_count was not matching the count of all of the condition_occurrences in the condition_occurrence table. So to solve that and keep the records of the same person_id, condition_concept_id, and start_date from getting overlooked and put into one row, we grouped them by condition_occurrence_id as well to keep them from getting lumped together and overlooked.
---Chris DOES have this type of GROUP BY in his drug_era code
        c.condition_occurrence_id,
	c.person_id,
	c.condition_concept_id,
	c.condition_start_date
)
--------------------------------------------------------------------------------------------------------------
---This INSERT INTO statement is similar to the FROM statement at the beginning. This is the format: "INSERT INTO <schema>.<table>(<fields>)"
INSERT INTO omop.condition_era(person_id, condition_concept_id, condition_era_start_date, condition_era_end_date, condition_occurrence_count)
SELECT person_id, condition_concept_id, MIN(condition_start_date) AS condition_era_start_date, era_end_date AS condition_era_end_date, COUNT(*) AS condition_occurrence_count
FROM cteConditionEnds
GROUP BY person_id, condition_concept_id, era_end_date
ORDER BY person_id, condition_concept_id
;
	--------------------------------------------------------------------------------------------------------------
	---Adapted to PostgreSQL condition_era from Pure SQL drug_era written by Chris_Knoll: https://gist.github.com/chrisknoll/c820cc12d833db2e3d1e
	---Upgraded to v5 OMOP
	---INTERVAL set to 30 days

	---Chris Knoll's comments are after two dashes
	---Taylor Delehanty's comments are after three dashes
	--------------------------------------------------------------------------------------------------------------
	TRUNCATE <schema>.condition_era;

	WITH cteConditionTarget (condition_occurrence_id, person_id, condition_concept_id, condition_start_date, condition_end_date) AS
	(
	SELECT co.condition_occurrence_id, co.person_id, co.condition_concept_id, co.condition_start_date,
	COALESCE(NULLIF(co.condition_end_date,NULL), condition_start_date + INTERVAL '1 day') AS condition_end_date
	---Format for FROM statement below is like this: "FROM <schema>.<table> co"
	FROM omop.condition_occurrence co
	),
	--------------------------------------------------------------------------------------------------------------
	cteEndDates (person_id, condition_concept_id, end_date) AS -- the magic
	(
	SELECT person_id, condition_concept_id, event_date - INTERVAL '30 days' AS end_date -- unpad the end date
	FROM
	(
	SELECT person_id, condition_concept_id, event_date, event_type,
	MAX(start_ordinal) OVER (PARTITION BY person_id, condition_concept_id ORDER BY event_date, event_type ROWS UNBOUNDED PRECEDING) AS start_ordinal, -- this pulls the current START down from the prior rows so that the NULLs from the END DATES will contain a value we can compare with
	ROW_NUMBER() OVER (PARTITION BY person_id, condition_concept_id ORDER BY event_date, event_type) AS overall_ord -- this re-numbers the inner UNION so all rows are numbered ordered by the event date
	FROM
	(
	-- select the start dates, assigning a row number to each
	SELECT person_id, condition_concept_id, condition_start_date AS event_date, -1 AS event_type, ROW_NUMBER() OVER (PARTITION BY person_id, condition_concept_id ORDER BY condition_start_date) AS start_ordinal
	FROM cteConditionTarget

	UNION ALL

	-- pad the end dates by 30 to allow a grace period for overlapping ranges.
	SELECT person_id, condition_concept_id, condition_end_date + INTERVAL '30 days', 1 AS event_type, NULL
	FROM cteConditionTarget
	) RAWDATA
	) e
	WHERE (2 * e.start_ordinal) - e.overall_ord = 0
	),
	--------------------------------------------------------------------------------------------------------------
	cteConditionEnds (person_id, condition_concept_id, condition_start_date, era_end_date) AS
	(
	SELECT
	c.person_id,
	c.condition_concept_id,
	c.condition_start_date,
	MIN(e.end_date) AS era_end_date
	FROM cteConditionTarget c
	JOIN cteEndDates e ON c.person_id = e.person_id AND c.condition_concept_id = e.condition_concept_id AND e.end_date >= c.condition_start_date
	GROUP BY
	---In Chris's original condition_era code, condition_occurrence_id is not in this table, but when at SAFTINet we ran through our data, we found that our condition_occurrence_count was not matching the count of all of the condition_occurrences in the condition_occurrence table. So to solve that and keep the records of the same person_id, condition_concept_id, and start_date from getting overlooked and put into one row, we grouped them by condition_occurrence_id as well to keep them from getting lumped together and overlooked.
	---Chris DOES have this type of GROUP BY in his drug_era code
	c.condition_occurrence_id,
	c.person_id,
	c.condition_concept_id,
	c.condition_start_date
	)
	--------------------------------------------------------------------------------------------------------------
	---This INSERT INTO statement is similar to the FROM statement at the beginning. This is the format: "INSERT INTO <schema>.<table>(<fields>)"
	INSERT INTO omop.condition_era(person_id, condition_concept_id, condition_era_start_date, condition_era_end_date, condition_occurrence_count)
	SELECT person_id, condition_concept_id, MIN(condition_start_date) AS condition_era_start_date, era_end_date AS condition_era_end_date, COUNT(*) AS condition_occurrence_count
	FROM cteConditionEnds
	GROUP BY person_id, condition_concept_id, era_end_date
	ORDER BY person_id, condition_concept_id
	;