chrisknoll/drugEraBuilder_demo

## drugEraBuilder_demo
/*
Drug era build logic:
Christopher Knoll
Janssen R&D

PURPOSE:
This SQL example demonstrates how drug exposure events in the CDM database can be rolled up into eras by arranging
each drug exposure start_date a row_number() and lining it up with the overall row_number for all start_date and end_dates.
When the row_number() of an overall date = 2 * the row_number of a start, we have a 'closed era'.

Example:

Consider the 2 overlapping date ranges:
1/1/2010	1/15/2010
1/12/2010	1/30/2010

By arranging the start dates in order with a row_number, and unioning this result with all end dates, you are left with the following:
EVENT_DATE	START_ORD	EVENT_TYPE
1/1/2010	1		-1
1/12/2010	2		-1
1/15/2010	null		1
1/30/2010	null		1

(Note: we add an EVENT_TYPE column so that EVENT_DATES that occur on the same day will have the ordinal of the start appear first before the end)

By applying a row_number() to these rows:
EVENT_DATE	START_ORD	OVERALL_ORD	EVENT_TYPE
1/1/2010	1		1		-1
1/12/2010	2		2		-1
1/15/2010	null		3		1
1/30/2010	null		4		1

By applying a MAX(START_ORD) OVER (ORDER BY EVENT_DATE, EVENT_TYPE ROWS UNBOUNDED PRECEDING) to this set, we can 'fill down' the start_ord columns that have nulls:

EVENT_DATE	START_ORD	OVERALL_ORD	EVENT_TYPE
1/1/2010	1		1		-1
1/12/2010	2		2		-1
1/15/2010	2		3		1
1/30/2010	2		4		1

From here we can see that where 2*START_ORD = OVERALL_ORD we have the date where all starts that have opened have been closed.
To get the final ERA, You find all the EVENT_DATEs where 2*START_ORD = OVERALL_ORD, (these are era end dates called ERA_END)
and the drug_exposure era_end is the first ERA_END that occurs after the drug_exposure start date.  This is accomplished in a GROUP BY
clause (see SQL below).

Since the only end date identified was 1/3/2010, this logic returns this rowset:
START_DATE	ERA_END
1/1/2010	1/30/2010
1/12/2010	1/30/2010

Finally (not shown in this demo), if you wanted to reduce all events to their era_start and era_end, you would take the
MIN(START_DATE) as ERA_START,  GROUP BY (ERA_END) to get the following result:
ERA_START	ERA_END
1/1/2010	1/30/2010

The SQL below shows the steps up to finding each drug exposure's ERA_END_DATE but does not do the final group-by to calculate the start_era.

*/

-- DROP TABLE #DRUG_EXPOSURE;

CREATE TABLE #DRUG_EXPOSURE
(
	[DRUG_EXPOSURE_ID] [bigint] NOT NULL,
	[PERSON_ID] [bigint] NOT NULL,
	[DRUG_CONCEPT_ID] [int] NOT NULL,
	[DRUG_EXPOSURE_START_DATE] [date] NOT NULL,
	[DRUG_TYPE_CONCEPT_ID] [int] NOT NULL,
	[REFILLS] [int] NULL,
	[QUANTITY] [int] NULL,
	[DAYS_SUPPLY] [int] NULL,
	[DRUG_EXPOSURE_END_DATE] [date] NULL
) ON [PRIMARY]

GO

-- Person 1000000
INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000000,2000000,'1/1/2010','1/15/2010',null,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000000,2000000,'1/1/2010','1/15/2010',null,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000000,2000000,'1/15/2010','1/20/2010',null,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000000,2000000,'2/14/2010','2/20/2010',null,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000000,2000000,'1/13/2010',null,10,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000000,2000001,'1/25/2010','2/3/2010',null,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000000,2000002,'4/1/2010','5/15/2010',null,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000000,2000003,'4/10/2010','4/15/2010',null,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000000,2000004,'5/13/2010',null,15,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000000,2000005,'9/1/2010',null,10,3000000,1,1);

-- Person 1000001:
INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000001,2000000,'1/1/2010','1/05/2010',null,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000001,2000001,'1/21/2010',null,10,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000001,2000002,'4/1/2010','4/15/2010',null,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000001,2000003,'8/1/2010',null,30,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000001,2000004,'10/1/2010','10/5/2010',null,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000001,2000005,'10/3/2010','10/15/2010',null,3000000,1,1);

INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
VALUES (0,1000001,2000006,'11/30/2010',null,5,3000000,1,1);

with T as
(
	select *, ROW_NUMBER() OVER (order by DRUG_EXPOSURE_ID) as rn
	from #DRUG_EXPOSURE
)
UPDATE T set DRUG_EXPOSURE_ID = rn;

-- here is the raw exposure data
select * from #DRUG_EXPOSURE;

-- here we show how we use the row_number for each START_DATE and row_number for all dates.
-- Note that when 2 * E.START_ORDINAL - E.OVERALL_ORD = 0, then we have matched all starts with ends to this point, and this would conclude an 'era'

-- In addition: we are applying a 30 day 'gap window' that if a start date begins within 30 days of an end date, it shoudl be
-- considered part of the same era.  To do this,we just add 30 days to all end dates to push them out, and then when we find
-- all the dates that are era ends, we subtract the 30 days back.

with cteDrugTarget (DRUG_EXPOSURE_ID, PERSON_ID, DRUG_CONCEPT_ID, DRUG_EXPOSURE_START_DATE, DRUG_EXPOSURE_END_DATE, DRUG_TYPE_CONCEPT_ID, REFILLS, QUANTITY, DAYS_SUPPLY, RowNum) as
(
-- Normalize DRUG_EXPOSURE_END_DATE to either the existing drug exposure end date, or add days supply, or add 1 day to the start date
	select d.DRUG_EXPOSURE_ID, d. PERSON_ID, DRUG_CONCEPT_ID, DRUG_EXPOSURE_START_DATE,
		COALESCE(DRUG_EXPOSURE_END_DATE, DATEADD(day,DAYS_SUPPLY,DRUG_EXPOSURE_START_DATE), DATEADD(day,1,DRUG_EXPOSURE_START_DATE)) as DRUG_EXPOSURE_END_DATE,
		d.DRUG_TYPE_CONCEPT_ID, d.REFILLS, d.QUANTITY, d.DAYS_SUPPLY,
		ROW_NUMBER() OVER (PARTITION BY d.PERSON_ID ORDER BY DRUG_EXPOSURE_START_DATE) as RowNum
	FROM #DRUG_EXPOSURE d
)
select E.*, DATEADD(day,-30,EVENT_DATE) as END_DATE -- unpad the end date
FROM
(
	select PERSON_ID, EVENT_DATE, EVENT_TYPE,
	MAX(START_ORDINAL) OVER (PARTITION BY PERSON_ID ORDER BY EVENT_DATE, EVENT_TYPE ROWS UNBOUNDED PRECEDING) as START_ORDINAL, -- this pulls the current START down from the prior rows so that the NULLs from the END DATES will contain a value we can compare with
	ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY EVENT_DATE, EVENT_TYPE) AS OVERALL_ORD -- this re-numbers the inner UNION so all rows are numbered ordered by the event date
	from
	(
		-- select the start dates, assigning a row number to each
		Select PERSON_ID, DRUG_EXPOSURE_START_DATE AS EVENT_DATE, -1 as EVENT_TYPE, ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY DRUG_EXPOSURE_START_DATE) as START_ORDINAL
		from cteDrugTarget

		UNION ALL

		-- add the end dates with NULL as the row number, padding the end dates by 30 to allow a grace period for overlapping ranges.
		select PERSON_ID, DATEADD(day,30,DRUG_EXPOSURE_END_DATE), 1 as EVENT_TYPE, NULL
		FROM cteDrugTarget
	) RAWDATA
) E
WHERE 2 * E.START_ORDINAL - E.OVERALL_ORD = 0


-- pretend that the data in #DRUG_EXPOSURE is the set or rows that match a certain descent concept ID.

with cteDrugTarget (DRUG_EXPOSURE_ID, PERSON_ID, DRUG_CONCEPT_ID, DRUG_EXPOSURE_START_DATE, DRUG_EXPOSURE_END_DATE, DRUG_TYPE_CONCEPT_ID, REFILLS, QUANTITY, DAYS_SUPPLY, RowNum) as
(
-- Normalize DRUG_EXPOSURE_END_DATE to either the existing drug exposure end date, or add days supply, or add 1 day to the start date
	select d.DRUG_EXPOSURE_ID, d. PERSON_ID, DRUG_CONCEPT_ID, DRUG_EXPOSURE_START_DATE,
		COALESCE(DRUG_EXPOSURE_END_DATE, DATEADD(day,DAYS_SUPPLY,DRUG_EXPOSURE_START_DATE), DATEADD(day,1,DRUG_EXPOSURE_START_DATE)) as DRUG_EXPOSURE_END_DATE,
		d.DRUG_TYPE_CONCEPT_ID, d.REFILLS, d.QUANTITY, d.DAYS_SUPPLY,
		ROW_NUMBER() OVER (PARTITION BY d.PERSON_ID ORDER BY DRUG_EXPOSURE_START_DATE) as RowNum
	FROM #DRUG_EXPOSURE d
),
cteEndDates (PERSON_ID, END_DATE) as -- the magic
(
	select PERSON_ID, DATEADD(day,-30,EVENT_DATE) as END_DATE -- unpad the end date
	FROM
	(
		select PERSON_ID, EVENT_DATE, EVENT_TYPE,
		MAX(START_ORDINAL) OVER (PARTITION BY PERSON_ID ORDER BY EVENT_DATE, EVENT_TYPE ROWS UNBOUNDED PRECEDING) as START_ORDINAL, -- this pulls the current START down from the prior rows so that the NULLs from the END DATES will contain a value we can compare with
		ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY EVENT_DATE, EVENT_TYPE) AS OVERALL_ORD -- this re-numbers the inner UNION so all rows are numbered ordered by the event date
		from
		(
			-- select the start dates, assigning a row number to each
			Select PERSON_ID, DRUG_EXPOSURE_START_DATE AS EVENT_DATE, -1 as EVENT_TYPE, ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY DRUG_EXPOSURE_START_DATE) as START_ORDINAL
			from cteDrugTarget

			UNION ALL

			-- add the end dates with NULL as the row number, padding the end dates by 30 to allow a grace period for overlapping ranges.
			select PERSON_ID, DATEADD(day,30,DRUG_EXPOSURE_END_DATE), 1 as EVENT_TYPE, NULL
			FROM cteDrugTarget
		) RAWDATA
	) E
	WHERE 2 * E.START_ORDINAL - E.OVERALL_ORD = 0
)

select * from cteEndDates;

-- now bring it all together by joining the calculated end dates back to the #drugExposure table, and the row's exposure's era end date will be the MIN end date that is >= the exposure date.

with cteDrugTarget (DRUG_EXPOSURE_ID, PERSON_ID, DRUG_CONCEPT_ID, DRUG_EXPOSURE_START_DATE, DRUG_EXPOSURE_END_DATE, DRUG_TYPE_CONCEPT_ID, REFILLS, QUANTITY, DAYS_SUPPLY, RowNum) as
(
-- Normalize DRUG_EXPOSURE_END_DATE to either the existing drug exposure end date, or add days supply, or add 1 day to the start date
	select d.DRUG_EXPOSURE_ID, d. PERSON_ID, DRUG_CONCEPT_ID, DRUG_EXPOSURE_START_DATE,
		COALESCE(DRUG_EXPOSURE_END_DATE, DATEADD(day,DAYS_SUPPLY,DRUG_EXPOSURE_START_DATE), DATEADD(day,1,DRUG_EXPOSURE_START_DATE)) as DRUG_EXPOSURE_END_DATE,
		d.DRUG_TYPE_CONCEPT_ID, d.REFILLS, d.QUANTITY, d.DAYS_SUPPLY,
		ROW_NUMBER() OVER (PARTITION BY d.PERSON_ID ORDER BY DRUG_EXPOSURE_START_DATE) as RowNum
	FROM #DRUG_EXPOSURE d
),
cteEndDates (PERSON_ID, END_DATE) as -- the magic
(
	select PERSON_ID, DATEADD(day,-30,EVENT_DATE) as END_DATE -- unpad the end date
	FROM
	(
		select PERSON_ID, EVENT_DATE, EVENT_TYPE,
		MAX(START_ORDINAL) OVER (PARTITION BY PERSON_ID ORDER BY EVENT_DATE, EVENT_TYPE ROWS UNBOUNDED PRECEDING) as START_ORDINAL, -- this pulls the current START down from the prior rows so that the NULLs from the END DATES will contain a value we can compare with
		ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY EVENT_DATE, EVENT_TYPE) AS OVERALL_ORD -- this re-numbers the inner UNION so all rows are numbered ordered by the event date
		from
		(
			-- select the start dates, assigning a row number to each
			Select PERSON_ID, DRUG_EXPOSURE_START_DATE AS EVENT_DATE, 1 as EVENT_TYPE, ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY DRUG_EXPOSURE_START_DATE) as START_ORDINAL
			from cteDrugTarget

			UNION ALL

			-- pad the end dates by 30 to allow a grace period for overlapping ranges.
			select PERSON_ID, DATEADD(day,30,DRUG_EXPOSURE_END_DATE), -1 as EVENT_TYPE, NULL
			FROM cteDrugTarget
		) RAWDATA
	) E
	WHERE (2 * E.START_ORDINAL) - E.OVERALL_ORD = 0
)
select d.DRUG_EXPOSURE_ID,
	d.PERSON_ID,
	d.DRUG_CONCEPT_ID,
	d.DRUG_EXPOSURE_START_DATE,
	d.DRUG_EXPOSURE_END_DATE,
	MIN(e.END_DATE) as ERA_END_DATE,
	d.DRUG_TYPE_CONCEPT_ID,
	d.REFILLS,
	d.QUANTITY,
	d.DAYS_SUPPLY,
	d.RowNum
FROM cteDrugTarget d
JOIN cteEndDates e on d.PERSON_ID = e.PERSON_ID and e.END_DATE >= d.DRUG_EXPOSURE_START_DATE
GROUP BY d.DRUG_EXPOSURE_ID,
	d.PERSON_ID,
	d.DRUG_CONCEPT_ID,
	d.DRUG_EXPOSURE_START_DATE,
	d.DRUG_EXPOSURE_END_DATE,
	d.DRUG_TYPE_CONCEPT_ID,
	d.REFILLS,
	d.QUANTITY,
	d.DAYS_SUPPLY,
	d.RowNum
;
	/*
	Drug era build logic:
	Christopher Knoll
	Janssen R&D

	PURPOSE:
	This SQL example demonstrates how drug exposure events in the CDM database can be rolled up into eras by arranging
	each drug exposure start_date a row_number() and lining it up with the overall row_number for all start_date and end_dates.
	When the row_number() of an overall date = 2 * the row_number of a start, we have a 'closed era'.

	Example:

	Consider the 2 overlapping date ranges:
	1/1/2010 1/15/2010
	1/12/2010 1/30/2010

	By arranging the start dates in order with a row_number, and unioning this result with all end dates, you are left with the following:
	EVENT_DATE START_ORD EVENT_TYPE
	1/1/2010 1 -1
	1/12/2010 2 -1
	1/15/2010 null 1
	1/30/2010 null 1

	(Note: we add an EVENT_TYPE column so that EVENT_DATES that occur on the same day will have the ordinal of the start appear first before the end)

	By applying a row_number() to these rows:
	EVENT_DATE START_ORD OVERALL_ORD EVENT_TYPE
	1/1/2010 1 1 -1
	1/12/2010 2 2 -1
	1/15/2010 null 3 1
	1/30/2010 null 4 1

	By applying a MAX(START_ORD) OVER (ORDER BY EVENT_DATE, EVENT_TYPE ROWS UNBOUNDED PRECEDING) to this set, we can 'fill down' the start_ord columns that have nulls:

	EVENT_DATE START_ORD OVERALL_ORD EVENT_TYPE
	1/1/2010 1 1 -1
	1/12/2010 2 2 -1
	1/15/2010 2 3 1
	1/30/2010 2 4 1

	From here we can see that where 2*START_ORD = OVERALL_ORD we have the date where all starts that have opened have been closed.
	To get the final ERA, You find all the EVENT_DATEs where 2*START_ORD = OVERALL_ORD, (these are era end dates called ERA_END)
	and the drug_exposure era_end is the first ERA_END that occurs after the drug_exposure start date. This is accomplished in a GROUP BY
	clause (see SQL below).

	Since the only end date identified was 1/3/2010, this logic returns this rowset:
	START_DATE ERA_END
	1/1/2010 1/30/2010
	1/12/2010 1/30/2010

	Finally (not shown in this demo), if you wanted to reduce all events to their era_start and era_end, you would take the
	MIN(START_DATE) as ERA_START, GROUP BY (ERA_END) to get the following result:
	ERA_START ERA_END
	1/1/2010 1/30/2010

	The SQL below shows the steps up to finding each drug exposure's ERA_END_DATE but does not do the final group-by to calculate the start_era.

	*/

	-- DROP TABLE #DRUG_EXPOSURE;

	CREATE TABLE #DRUG_EXPOSURE
	(
	[DRUG_EXPOSURE_ID] [bigint] NOT NULL,
	[PERSON_ID] [bigint] NOT NULL,
	[DRUG_CONCEPT_ID] [int] NOT NULL,
	[DRUG_EXPOSURE_START_DATE] [date] NOT NULL,
	[DRUG_TYPE_CONCEPT_ID] [int] NOT NULL,
	[REFILLS] [int] NULL,
	[QUANTITY] [int] NULL,
	[DAYS_SUPPLY] [int] NULL,
	[DRUG_EXPOSURE_END_DATE] [date] NULL
	) ON [PRIMARY]

	GO

	-- Person 1000000
	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000000,2000000,'1/1/2010','1/15/2010',null,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000000,2000000,'1/1/2010','1/15/2010',null,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000000,2000000,'1/15/2010','1/20/2010',null,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000000,2000000,'2/14/2010','2/20/2010',null,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000000,2000000,'1/13/2010',null,10,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000000,2000001,'1/25/2010','2/3/2010',null,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000000,2000002,'4/1/2010','5/15/2010',null,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000000,2000003,'4/10/2010','4/15/2010',null,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000000,2000004,'5/13/2010',null,15,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000000,2000005,'9/1/2010',null,10,3000000,1,1);

	-- Person 1000001:
	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000001,2000000,'1/1/2010','1/05/2010',null,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000001,2000001,'1/21/2010',null,10,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000001,2000002,'4/1/2010','4/15/2010',null,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000001,2000003,'8/1/2010',null,30,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000001,2000004,'10/1/2010','10/5/2010',null,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000001,2000005,'10/3/2010','10/15/2010',null,3000000,1,1);

	INSERT INTO #DRUG_EXPOSURE ([DRUG_EXPOSURE_ID],[PERSON_ID],[DRUG_CONCEPT_ID],[DRUG_EXPOSURE_START_DATE],[DRUG_EXPOSURE_END_DATE],[DAYS_SUPPLY],[DRUG_TYPE_CONCEPT_ID],[REFILLS],[QUANTITY])
	VALUES (0,1000001,2000006,'11/30/2010',null,5,3000000,1,1);

	with T as
	(
	select *, ROW_NUMBER() OVER (order by DRUG_EXPOSURE_ID) as rn
	from #DRUG_EXPOSURE
	)
	UPDATE T set DRUG_EXPOSURE_ID = rn;

	-- here is the raw exposure data
	select * from #DRUG_EXPOSURE;

	-- here we show how we use the row_number for each START_DATE and row_number for all dates.
	-- Note that when 2 * E.START_ORDINAL - E.OVERALL_ORD = 0, then we have matched all starts with ends to this point, and this would conclude an 'era'

	-- In addition: we are applying a 30 day 'gap window' that if a start date begins within 30 days of an end date, it shoudl be
	-- considered part of the same era. To do this,we just add 30 days to all end dates to push them out, and then when we find
	-- all the dates that are era ends, we subtract the 30 days back.

	with cteDrugTarget (DRUG_EXPOSURE_ID, PERSON_ID, DRUG_CONCEPT_ID, DRUG_EXPOSURE_START_DATE, DRUG_EXPOSURE_END_DATE, DRUG_TYPE_CONCEPT_ID, REFILLS, QUANTITY, DAYS_SUPPLY, RowNum) as
	(
	-- Normalize DRUG_EXPOSURE_END_DATE to either the existing drug exposure end date, or add days supply, or add 1 day to the start date
	select d.DRUG_EXPOSURE_ID, d. PERSON_ID, DRUG_CONCEPT_ID, DRUG_EXPOSURE_START_DATE,
	COALESCE(DRUG_EXPOSURE_END_DATE, DATEADD(day,DAYS_SUPPLY,DRUG_EXPOSURE_START_DATE), DATEADD(day,1,DRUG_EXPOSURE_START_DATE)) as DRUG_EXPOSURE_END_DATE,
	d.DRUG_TYPE_CONCEPT_ID, d.REFILLS, d.QUANTITY, d.DAYS_SUPPLY,
	ROW_NUMBER() OVER (PARTITION BY d.PERSON_ID ORDER BY DRUG_EXPOSURE_START_DATE) as RowNum
	FROM #DRUG_EXPOSURE d
	)
	select E.*, DATEADD(day,-30,EVENT_DATE) as END_DATE -- unpad the end date
	FROM
	(
	select PERSON_ID, EVENT_DATE, EVENT_TYPE,
	MAX(START_ORDINAL) OVER (PARTITION BY PERSON_ID ORDER BY EVENT_DATE, EVENT_TYPE ROWS UNBOUNDED PRECEDING) as START_ORDINAL, -- this pulls the current START down from the prior rows so that the NULLs from the END DATES will contain a value we can compare with
	ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY EVENT_DATE, EVENT_TYPE) AS OVERALL_ORD -- this re-numbers the inner UNION so all rows are numbered ordered by the event date
	from
	(
	-- select the start dates, assigning a row number to each
	Select PERSON_ID, DRUG_EXPOSURE_START_DATE AS EVENT_DATE, -1 as EVENT_TYPE, ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY DRUG_EXPOSURE_START_DATE) as START_ORDINAL
	from cteDrugTarget

	UNION ALL

	-- add the end dates with NULL as the row number, padding the end dates by 30 to allow a grace period for overlapping ranges.
	select PERSON_ID, DATEADD(day,30,DRUG_EXPOSURE_END_DATE), 1 as EVENT_TYPE, NULL
	FROM cteDrugTarget
	) RAWDATA
	) E
	WHERE 2 * E.START_ORDINAL - E.OVERALL_ORD = 0


	-- pretend that the data in #DRUG_EXPOSURE is the set or rows that match a certain descent concept ID.

	with cteDrugTarget (DRUG_EXPOSURE_ID, PERSON_ID, DRUG_CONCEPT_ID, DRUG_EXPOSURE_START_DATE, DRUG_EXPOSURE_END_DATE, DRUG_TYPE_CONCEPT_ID, REFILLS, QUANTITY, DAYS_SUPPLY, RowNum) as
	(
	-- Normalize DRUG_EXPOSURE_END_DATE to either the existing drug exposure end date, or add days supply, or add 1 day to the start date
	select d.DRUG_EXPOSURE_ID, d. PERSON_ID, DRUG_CONCEPT_ID, DRUG_EXPOSURE_START_DATE,
	COALESCE(DRUG_EXPOSURE_END_DATE, DATEADD(day,DAYS_SUPPLY,DRUG_EXPOSURE_START_DATE), DATEADD(day,1,DRUG_EXPOSURE_START_DATE)) as DRUG_EXPOSURE_END_DATE,
	d.DRUG_TYPE_CONCEPT_ID, d.REFILLS, d.QUANTITY, d.DAYS_SUPPLY,
	ROW_NUMBER() OVER (PARTITION BY d.PERSON_ID ORDER BY DRUG_EXPOSURE_START_DATE) as RowNum
	FROM #DRUG_EXPOSURE d
	),
	cteEndDates (PERSON_ID, END_DATE) as -- the magic
	(
	select PERSON_ID, DATEADD(day,-30,EVENT_DATE) as END_DATE -- unpad the end date
	FROM
	(
	select PERSON_ID, EVENT_DATE, EVENT_TYPE,
	MAX(START_ORDINAL) OVER (PARTITION BY PERSON_ID ORDER BY EVENT_DATE, EVENT_TYPE ROWS UNBOUNDED PRECEDING) as START_ORDINAL, -- this pulls the current START down from the prior rows so that the NULLs from the END DATES will contain a value we can compare with
	ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY EVENT_DATE, EVENT_TYPE) AS OVERALL_ORD -- this re-numbers the inner UNION so all rows are numbered ordered by the event date
	from
	(
	-- select the start dates, assigning a row number to each
	Select PERSON_ID, DRUG_EXPOSURE_START_DATE AS EVENT_DATE, -1 as EVENT_TYPE, ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY DRUG_EXPOSURE_START_DATE) as START_ORDINAL
	from cteDrugTarget

	UNION ALL

	-- add the end dates with NULL as the row number, padding the end dates by 30 to allow a grace period for overlapping ranges.
	select PERSON_ID, DATEADD(day,30,DRUG_EXPOSURE_END_DATE), 1 as EVENT_TYPE, NULL
	FROM cteDrugTarget
	) RAWDATA
	) E
	WHERE 2 * E.START_ORDINAL - E.OVERALL_ORD = 0
	)

	select * from cteEndDates;

	-- now bring it all together by joining the calculated end dates back to the #drugExposure table, and the row's exposure's era end date will be the MIN end date that is >= the exposure date.

	with cteDrugTarget (DRUG_EXPOSURE_ID, PERSON_ID, DRUG_CONCEPT_ID, DRUG_EXPOSURE_START_DATE, DRUG_EXPOSURE_END_DATE, DRUG_TYPE_CONCEPT_ID, REFILLS, QUANTITY, DAYS_SUPPLY, RowNum) as
	(
	-- Normalize DRUG_EXPOSURE_END_DATE to either the existing drug exposure end date, or add days supply, or add 1 day to the start date
	select d.DRUG_EXPOSURE_ID, d. PERSON_ID, DRUG_CONCEPT_ID, DRUG_EXPOSURE_START_DATE,
	COALESCE(DRUG_EXPOSURE_END_DATE, DATEADD(day,DAYS_SUPPLY,DRUG_EXPOSURE_START_DATE), DATEADD(day,1,DRUG_EXPOSURE_START_DATE)) as DRUG_EXPOSURE_END_DATE,
	d.DRUG_TYPE_CONCEPT_ID, d.REFILLS, d.QUANTITY, d.DAYS_SUPPLY,
	ROW_NUMBER() OVER (PARTITION BY d.PERSON_ID ORDER BY DRUG_EXPOSURE_START_DATE) as RowNum
	FROM #DRUG_EXPOSURE d
	),
	cteEndDates (PERSON_ID, END_DATE) as -- the magic
	(
	select PERSON_ID, DATEADD(day,-30,EVENT_DATE) as END_DATE -- unpad the end date
	FROM
	(
	select PERSON_ID, EVENT_DATE, EVENT_TYPE,
	MAX(START_ORDINAL) OVER (PARTITION BY PERSON_ID ORDER BY EVENT_DATE, EVENT_TYPE ROWS UNBOUNDED PRECEDING) as START_ORDINAL, -- this pulls the current START down from the prior rows so that the NULLs from the END DATES will contain a value we can compare with
	ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY EVENT_DATE, EVENT_TYPE) AS OVERALL_ORD -- this re-numbers the inner UNION so all rows are numbered ordered by the event date
	from
	(
	-- select the start dates, assigning a row number to each
	Select PERSON_ID, DRUG_EXPOSURE_START_DATE AS EVENT_DATE, 1 as EVENT_TYPE, ROW_NUMBER() OVER (PARTITION BY PERSON_ID ORDER BY DRUG_EXPOSURE_START_DATE) as START_ORDINAL
	from cteDrugTarget

	UNION ALL

	-- pad the end dates by 30 to allow a grace period for overlapping ranges.
	select PERSON_ID, DATEADD(day,30,DRUG_EXPOSURE_END_DATE), -1 as EVENT_TYPE, NULL
	FROM cteDrugTarget
	) RAWDATA
	) E
	WHERE (2 * E.START_ORDINAL) - E.OVERALL_ORD = 0
	)
	select d.DRUG_EXPOSURE_ID,
	d.PERSON_ID,
	d.DRUG_CONCEPT_ID,
	d.DRUG_EXPOSURE_START_DATE,
	d.DRUG_EXPOSURE_END_DATE,
	MIN(e.END_DATE) as ERA_END_DATE,
	d.DRUG_TYPE_CONCEPT_ID,
	d.REFILLS,
	d.QUANTITY,
	d.DAYS_SUPPLY,
	d.RowNum
	FROM cteDrugTarget d
	JOIN cteEndDates e on d.PERSON_ID = e.PERSON_ID and e.END_DATE >= d.DRUG_EXPOSURE_START_DATE
	GROUP BY d.DRUG_EXPOSURE_ID,
	d.PERSON_ID,
	d.DRUG_CONCEPT_ID,
	d.DRUG_EXPOSURE_START_DATE,
	d.DRUG_EXPOSURE_END_DATE,
	d.DRUG_TYPE_CONCEPT_ID,
	d.REFILLS,
	d.QUANTITY,
	d.DAYS_SUPPLY,
	d.RowNum
	;