Skip to content

Instantly share code, notes, and snippets.

@yoomlam
Last active October 4, 2020 20:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yoomlam/485285c97e0a1039a8855a0c73431b59 to your computer and use it in GitHub Desktop.
Save yoomlam/485285c97e0a1039a8855a0c73431b59 to your computer and use it in GitHub Desktop.
Caseflow Reader adoption
# https://github.com/department-of-veterans-affairs/caseflow/issues/12556
# US fiscal year
span=Time.new(2019,10)..Time.new(2020,10)-1
## find id of first DocumentView record for each month
def find_first_record_for(init_id, first_hours)
curr_id=init_id
dv_first=nil
until dv_first do
return nil if DocumentView.find(curr_id).first_viewed_at > first_hours.end # need to increase range of first_hours
dv_first=DocumentView.where(id: curr_id..(curr_id+1_000_000)) .where(first_viewed_at: first_hours).order(:id).first
curr_id=curr_id+1_000_000
pp curr_id, first_hours
end
dv_first
end
month=span.begin.beginning_of_month
curr_id=40_000_000
monthly_ids={}
while month < span.end do
first_hours=month..month+6.hour
dv_first=find_first_record_for(curr_id, first_hours)
monthly_ids[month] = dv_first.id
pp monthly_ids
curr_id = dv_first.id
month=month.next_month
end
monthly_ids
{2019-10-01 00:00:00 -0400=>48034519,
2019-11-01 00:00:00 -0400=>50061717,
2019-12-01 00:00:00 -0500=>52019658,
2020-01-01 00:00:00 -0500=>53970029,
2020-02-01 00:00:00 -0500=>56218777,
2020-03-01 00:00:00 -0500=>58418293,
2020-04-01 00:00:00 -0400=>60862816,
2020-05-01 00:00:00 -0400=>63086836,
2020-06-01 00:00:00 -0400=>65083340,
2020-07-01 00:00:00 -0400=>67220638,
2020-08-01 00:00:00 -0400=>69209253,
2020-09-01 00:00:00 -0400=>71089325}
# Double-check first and last DocumentView dates (in UTC) for each month
month=span.begin.beginning_of_month
dv_first_last=[]
dv_first_last_ids=[]
while month < span.end do
dv_first=DocumentView.find(monthly_ids[month])
dv_last=monthly_ids[month.next_month] ? DocumentView.find(monthly_ids[month.next_month]-1) : DocumentView.last
dv_first_last << [dv_first.created_at, dv_last.created_at]
dv_first_last_ids << [dv_first.id, dv_last.id]
month=month.next_month
end
dv_first_last
=> [[Tue, 01 Oct 2019 04:00:00 UTC +00:00, Fri, 01 Nov 2019 03:59:48 UTC +00:00],
[Fri, 01 Nov 2019 04:00:10 UTC +00:00, Sun, 01 Dec 2019 04:58:29 UTC +00:00],
[Sun, 01 Dec 2019 05:01:59 UTC +00:00, Wed, 01 Jan 2020 04:47:26 UTC +00:00],
[Wed, 01 Jan 2020 05:35:37 UTC +00:00, Sat, 01 Feb 2020 04:59:18 UTC +00:00],
[Sat, 01 Feb 2020 05:00:04 UTC +00:00, Sun, 01 Mar 2020 02:29:28 UTC +00:00],
[Sun, 01 Mar 2020 07:52:01 UTC +00:00, Wed, 01 Apr 2020 03:59:40 UTC +00:00],
[Wed, 01 Apr 2020 04:00:14 UTC +00:00, Fri, 01 May 2020 03:59:58 UTC +00:00],
[Fri, 01 May 2020 04:00:05 UTC +00:00, Mon, 01 Jun 2020 03:55:47 UTC +00:00],
[Mon, 01 Jun 2020 04:00:39 UTC +00:00, Wed, 01 Jul 2020 03:59:57 UTC +00:00],
[Wed, 01 Jul 2020 04:00:01 UTC +00:00, Sat, 01 Aug 2020 03:59:56 UTC +00:00],
[Sat, 01 Aug 2020 04:00:10 UTC +00:00, Tue, 01 Sep 2020 03:59:53 UTC +00:00],
[Tue, 01 Sep 2020 04:00:03 UTC +00:00, Tue, 22 Sep 2020 21:05:28 UTC +00:00]]
dv_first_last_ids
=> [[48034519, 50061716],
[50061717, 52019657],
[52019658, 53970028],
[53970029, 56218776],
[56218777, 58418292],
[58418293, 60862815],
[60862816, 63086835],
[63086836, 65083339],
[65083340, 67220637],
[67220638, 69209252],
[69209253, 71089324],
[71089325, 72682542]]
dv_first_last_ids
=> [[48034519, 50061716],
[50061717, 52019657],
[52019658, 53970028],
[53970029, 56218776],
[56218777, 58418292],
[58418293, 60862815],
[60862816, 63086835],
[63086836, 65083339],
[65083340, 67220637],
[67220638, 69209252],
[69209253, 71089324],
[71089325, 73204919]] # <= updated last ID
# Get tallies for each month
## reduce verbosity
ActiveRecord::Base.logger.level = :warn
## May take a while
tallies=dv_first_last_ids.map{|dv_first_id, dv_last_id|
counts=[]
tally={}
DocumentView.in_batches(of: 100_000, start: dv_first_id, finish: dv_last_id).each_with_index do |relation, batch_index|
puts "Processing relation batch ##{batch_index}"
result = relation.group(:user_id).count;
counts << result
tally.merge!(result){ |key,ov,tv| ov+tv }
pp tally
end;
[DocumentView.find(dv_first_id).created_at.beginning_of_month, tally]
};
# Compute metric
uniq_users=JudgeTeam.all.map(&:users).flatten.uniq;
uniq_users.count
tallies.map{|month, tally|
remaining=uniq_users.reject{|u| tally[u.id]&.>5};
# calculate percent of the JudgeTeam users who have used Reader
[month.strftime("%Y-%m"), remaining.count, "%0.2f" % (1.0 - remaining.count.to_f/uniq_users.count)]
}
=> [
["2019-10", 146, "0.85"],
["2019-11", 145, "0.85"],
["2019-12", 145, "0.85"],
["2020-01", 150, "0.85"],
["2020-02", 159, "0.84"],
["2020-03", 139, "0.86"],
["2020-04", 117, "0.88"],
["2020-05", 114, "0.89"],
["2020-06", 81, "0.92"],
["2020-07", 75, "0.92"],
["2020-08", 44, "0.96"],
["2020-09", 42, "0.96"]]
# How many users did not use Reader at all across these months?
remaining_users=uniq_users;
tallies.each{|month, tally| remaining_users-=uniq_users.select{|u| tally[u.id]&.>5} };
remaining_users.count
remaining_users.pluck(:id, :css_id)
WITH judge_teams AS (
SELECT "organizations"."id" FROM "organizations"
WHERE "organizations"."type" IN ('JudgeTeam') AND "organizations"."status" = 'active'
),
team_users AS (
SELECT DISTINCT "organizations_users"."user_id" FROM "organizations_users"
WHERE "organizations_users"."organization_id" IN (select * from judge_teams)
),
monthly_users AS (
SELECT DATE_TRUNC('year', created_at + INTERVAL '3 months') AS year, user_id
FROM "document_views"
WHERE "document_views"."id" BETWEEN 53970029 AND 73204919
AND user_id in (select * from team_users)
GROUP BY 1, 2
)
SELECT year, count(*) as unique_user_count, count(*)*100.0/(SELECT count(*) from team_users) as percent
FROM monthly_users
GROUP BY 1
ORDER BY 1
WITH judge_teams AS (
SELECT "organizations"."id" FROM "organizations"
WHERE "organizations"."type" IN ('JudgeTeam') AND "organizations"."status" = 'active'
),
team_users AS (
SELECT DISTINCT "organizations_users"."user_id" FROM "organizations_users"
WHERE "organizations_users"."organization_id" IN (select * from judge_teams)
),
monthly_users AS (
SELECT date_trunc('month',created_at) as month, user_id
FROM "document_views"
WHERE "document_views"."id" BETWEEN 48034501 AND 73204919 -- modify this according to the date range
AND user_id in (select * from team_users)
GROUP BY 1, 2
)
SELECT month, count(*) as unique_user_count, count(*)*100.0/(SELECT count(*) from team_users) as percent
FROM monthly_users
GROUP BY month
ORDER BY month
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment