Skip to content

Instantly share code, notes, and snippets.

@sheldonhull
Last active March 12, 2019 05:52
Show Gist options
  • Save sheldonhull/8c7235ecb75bb91833e1 to your computer and use it in GitHub Desktop.
Save sheldonhull/8c7235ecb75bb91833e1 to your computer and use it in GitHub Desktop.
Eliminate Overlapping Dates, with subgroupings (such as overlap only eliminated at process > task level, instead of just process level)
if object_id('tempdb..#Awesome') is not null
drop table #awesome;
create table #awesome
(
unique_join_k int
,process varchar(10)
,start_date date
,end_date date
,grouper_id as convert(uniqueidentifier, hashbytes('sha1',
isnull(cast(unique_join_k as varchar(max)), '|')
+ '|' + isnull(cast(process as varchar(max)), '|')
))
)
insert into #awesome
select
*
from
(
values
(1000, 'process 1', cast('20131006' as date), cast('20131213' as date)),
(1000, 'process 1', cast('20131225' as date), cast('20140215' as date)),
(1000, 'process 1', cast('20140123' as date), cast('20140215' as date)),
(1000, 'process 2', cast('20140328' as date), cast('20140617' as date)),
(1001, 'process 1', cast('20130214' as date), cast('20130421' as date)),
(1001, 'process 1', cast('20130827' as date), cast('20130926' as date)),
(1001, 'process 1', cast('20131211' as date), cast('20131221' as date)),
(1001, 'process 1', cast('20140227' as date), cast('20140405' as date)),
(1002, 'process 1', cast('20130518' as date), cast('20130622' as date)),
(1002, 'process 2', cast('20130527' as date), cast('20130618' as date)),
(1003, 'process 1', cast('20130312' as date), cast('20130428' as date)),
(1003, 'process 1', cast('20130510' as date), cast('20130614' as date)),
(1003, 'process 1', cast('20130725' as date), cast('20131115' as date)),
(1003, 'process 1', cast('20131010' as date), cast('20131014' as date)),
(1003, 'process 2', cast('20140413' as date), cast('20140614' as date)),
(1004, 'process 1', cast('20130116' as date), cast('20130425' as date)),
(1004, 'process 1', cast('20130828' as date), cast('20130926' as date)),
(1004, 'process 1', cast('20140101' as date), cast('20140210' as date)),
(1004, 'process 1', cast('20140312' as date), cast('20140408' as date)),
(1004, 'process 2', cast('20140513' as date), cast('20140806' as date)),
(1004, 'process 2', cast('20140515' as date), cast('20140818' as date)),
(1005, 'process 1', cast('20130606' as date), cast('20130724' as date)),
(1005, 'process 2', cast('20131125' as date), cast('20140217' as date)),
(1005, 'process 3', cast('20131220' as date), cast('20140131' as date)),
(1005, 'process 3', cast('20140503' as date), cast('20140605' as date))) as vtable (unique_join_k, process, start_date, end_date);
select
*
from
#awesome as a
order by
a.unique_join_k
,a.[start_date];
with c1 as -- let e = end ordinals, let s = start ordinals
(
select
grouper_id
,unique_join_k
,time_start = start_date
,is_start = 1
,sqid_end = null
,sqid_start = row_number() over (partition by grouper_id order by start_date)
from
#awesome cs
union all
select
grouper_id
,unique_join_k
,time_start = end_date
,is_start = 0
,sqid_end = row_number() over (partition by grouper_id order by end_date)
,sqid_start = null
from
#awesome cs
),
c2 as (-- let se = start or end ordinal, namely, how many events (start or end) happened so far
select
c1.*
,sqid_all_is_starts = row_number() over (partition by grouper_id order by time_start, is_start desc)
from
c1
), --select * from c2
c3 as (
/*******************************************************
For start events, the expression sqid_start - (sqid_all_is_starts - sqid_start) - 1 represents how many sessions were active
just before the current (hence - 1)
For end events, the expression (sqid_all_is_starts - sqid_end) - sqid_end ) represents how many sessions are active
right after this one
The above two expressions are 0 exactly when a group of packed intervals
either starts or ends, respectively
After filtering only events when a group of packed intervals either starts or ends,
group each pair of adjacent start/end events
*******************************************************/
select
grouper_id
,unique_join_k
,time_start
,date_grouping = floor((row_number() over (partition by grouper_id order by time_start) - 1) / 2 + 1)
from
c2
where
coalesce(sqid_start - (sqid_all_is_starts - sqid_start) - 1,
(sqid_all_is_starts - sqid_end) - sqid_end) = 0
) -- select * from c3
/*******************************************************
OVERLAPPING DATES ON SAME GROUP RESOLVED IN DISTINCT RANGES
*******************************************************/
,
overlappedgone as (
select
*
,sqid = dense_rank() over (partition by grouper_id, date_grouping order by time_start asc)
from
c3
)
select
unique_join_k_start = og1.unique_join_k
,unique_join_k_end = og2.unique_join_k
,calc_start_date = og1.time_start
,calc_end_date = og2.time_start
,unique_non_overlapping_days = datediff(day, og1.time_start, og2.time_start)
,grouper_id = og1.grouper_id
from
overlappedgone og1
inner join overlappedgone og2
on og1.date_grouping = og2.date_grouping
and og1.grouper_id = og2.grouper_id
and og2.sqid = 2
where
og1.sqid = 1
order by
og1.unique_join_k
,calc_start_date
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment