Skip to content

Instantly share code, notes, and snippets.

@cbare
Last active January 29, 2018 19:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cbare/8370e56ebe03f1f02a779845c4855346 to your computer and use it in GitHub Desktop.
Save cbare/8370e56ebe03f1f02a779845c4855346 to your computer and use it in GitHub Desktop.
Travels in the Tidyverse: R code to count reviews by external physicians
# compile stats on external clinical reviews
library('RPostgreSQL')
library('dplyr')
library('lubridate')
library('tidyr')
# Get external reviews with UTC timestamps from the DB
# \copy (select distinct o.observation_id, o.collection_date, o.vendor_id, v.name, reviewed_at_utc, reviewed_by from ops.review r join analysis.observation o on r.observation_id=o.observation_id join analysis.vendor v on o.vendor_id=v.vendor_id where reviewed_at_utc >= '2017-05-01' order by reviewed_at_utc) TO '/Users/cbare/Desktop/external-reviews.tsv' WITH NULL AS '';
# get a table of addresses from user-management
env.vars <- Sys.getenv(c('DB_USER', 'DB_PW', 'DB_HOST'))
con <- dbConnect(PostgreSQL(),
user=env.vars['DB_USER'],
password=env.vars['DB_PW'],
host=env.vars['DB_HOST'],
dbname="user-management")
q <- paste(
"select u.public_client_id, a.city, a.region, a.postal_code, a.country",
"from users.user u",
"join users.address a on u.shipping_address_id=a.address_id",
"where u.public_client_id not like '99%'",
"order by public_client_id;")
rs <- dbSendQuery(con, q)
addresses <- fetch(rs, n=-1)
dbDisconnect(con)
# get reviews
con <- dbConnect(PostgreSQL(),
user=env.vars['DB_USER'],
password=env.vars['DB_PW'],
host=env.vars['DB_HOST'],
dbname="arivale")
q <- paste(
"select distinct o.observation_id, o.collection_date, c.public_client_id, o.vendor_id, v.name, reviewed_at_utc, reviewed_by",
"from ops.review r",
"join analysis.observation o on r.observation_id=o.observation_id",
"join analysis.client c on o.client_id=c.client_id",
"join analysis.vendor v on o.vendor_id=v.vendor_id",
"where reviewed_at_utc >= '2017-05-01'",
"order by reviewed_at_utc")
rs <- dbSendQuery(con, q)
df <- fetch(rs, n=-1)
dbDisconnect(con)
# merge reviews with address info yielding a data.frame with addresses
dfwa <- left_join(df, addresses, by='public_client_id')
# convert timestamps to pacific time
dfwa <- dfwa %>% mutate( reviewed_at_pacific_time=with_tz(as_datetime(reviewed_at_utc), 'America/Los_angeles') )
# trim rows prior to may 1st pacific time
dfwa <- dfwa[ dfwa$reviewed_at_pacific_time >= as_datetime('2017-05-01 00:00:00 -0800'), ]
# we going to group by date, week or month
dfwa <- dfwa %>% mutate( pacific_date=as_date(reviewed_at_pacific_time) )
dfwa <- dfwa %>% mutate( pacific_week=week(pacific_date)+52*(year(pacific_date)-2017) )
dfwa <- dfwa %>% mutate( pacific_month=month(pacific_date)+12*(year(pacific_date)-2017) )
# counts will count reviews performed by each physician on each date for each vendor
counts <- dfwa %>% group_by( name, pacific_date, reviewed_by, region ) %>% summarise( count=n() ) %>% spread(name, count, fill=0L) %>% arrange(reviewed_by, region, pacific_date)
counts <- dfwa %>% group_by( name, pacific_week, reviewed_by, region ) %>% summarise( count=n() ) %>% spread(name, count, fill=0L) %>% arrange(reviewed_by, pacific_week, region)
write.table(counts, '/Users/cbare/Desktop/external-review-counts-by-week.tsv', sep='\t', row.names=F )
counts <- dfwa %>% group_by( name, pacific_month, reviewed_by, region ) %>% summarise( count=n() ) %>% spread(name, count, fill=0L) %>% arrange(reviewed_by, pacific_month, region)
write.table(counts, '/Users/cbare/Desktop/external-review-counts-by-month.tsv', sep='\t', row.names=F )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment