Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@philerooski
Last active March 14, 2020 00:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save philerooski/e357930a484c3b1190a58c0a75192daf to your computer and use it in GitHub Desktop.
Save philerooski/e357930a484c3b1190a58c0a75192daf to your computer and use it in GitHub Desktop.
rm(list=ls())
options(stringsAsFactors = F)
library(synapser)
library(tidyverse)
library(lubridate)
fetch_and_prep <- function() {
mpower_f <- synGet("syn21213531") #"syn11488492")
mpower <- read_tsv(mpower_f$path)
now <- lubridate::now()
mpower <- mpower %>%
mutate(createdOn = as_datetime(createdOn/1000)) %>%
filter(createdOn < now)
}
mutate_participant_week_day <- function(engagement) {
first_activity <- engagement %>%
group_by(healthCode) %>%
summarise(first_activity_time = min(createdOn, na.rm=T))
engagement <- inner_join(engagement, first_activity)
engagement <- engagement %>%
mutate(
seconds_since_first_activity = createdOn - first_activity_time,
participantWeek = as.integer(
floor(as.numeric(
as.duration(seconds_since_first_activity), "weeks"))),
participantDay = as.integer(
floor(as.numeric(
as.duration(seconds_since_first_activity), "days")))
) %>%
select(-first_activity_time, -seconds_since_first_activity)
return(engagement)
}
mutate_task_type <- function(engagement) {
engagement %>%
mutate(taskType = case_when(
simpleName %in% c("WalkingActivity", "TappingActivity", "MemoryActivity",
"VoiceActivity", "TremorActivity") ~ "active-sensor",
simpleName %in% c("PDQ8", "MDSUPDRS", "study_feedback", "my_thoughts",
"MedicationsSurvey", "MedicationTracker", "PHQ8",
"MoodSurvey", "EnrollmentSurvey",
"NonIdentifiableDemographicsTask", "mythoughts") ~ "survey",
simpleName %in% c("HealthKitDataCollector", "HealthKitSleepCollector",
"HealthKitWorkoutCollector", "locationTracker",
"displacement", "displacementCollector",
"motionActivityCollector") ~ "passive-sensor"))
}
mutate_task_frequency <- function(engagement) {
engagement %>%
mutate(taskFrequency = case_when(
simpleName %in% c("WalkingActivity", "TappingActivity", "MemoryActivity",
"VoiceActivity", "TremorActivity") ~ "daily",
simpleName %in% c("PDQ8", "MDSUPDRS") ~ "monthly",
simpleName %in% c("HealthKitDataCollector", "HealthKitSleepCollector",
"HealthKitWorkoutCollector", "locationTracker",
"displacement", "displacementCollector",
"motionActivityCollector") ~ "continuous",
simpleName %in% c("study_feedback", "my_thoughts", "MedicationsSurvey",
"MedicationTracker", "PHQ8", "MoodSurvey",
"mythoughts") ~ "intermittent",
simpleName %in% c("EnrollmentSurvey", "NonIdentifiableDemographicsTask") ~ "baseline"))
}
mutate_local_time <- function(engagement) {
healthcode_to_utc <- synGet("syn12635875")$path %>% read_tsv()
engagement <- engagement %>%
left_join(healthcode_to_utc, by = "healthCode") %>%
mutate(createdOnLocalTime = as.character(createdOn + lubridate::hours(UTC_offset)),
createdOnTimeZone = UTC_offset*100,
createdOn = as.character(createdOn)) %>%
select(-actual_state, -UTC_offset)
return(engagement)
}
curate_mpower_metadata <- function() {
df <- data.table::fread(synGet("syn15624955")$path, data.table = F) %>%
dplyr::mutate(age_group = cut(age, breaks=c(17,29,39,49, 59, 120)))
df <- df %>% dplyr::select(-health_history, -healthcare_provider, -last_smoked_timezone,
-SourceID, -diagnosis_year, -medication_start_year, -onset_year, -professional_diagnosis,
-when_deep_brain_stimulation, -packs_per_day, -years_smoking,
-are_caretaker, -deep_brain_stimulation, -medical_usage, -medical_usage_yesterday,
-home_usage, -smoked, -surgery, -living_alone_status, -medication_bool,
-past_participation, -video_usage, -phone_usage, -recordId)
#remove test users
testHealthCodes <- df %>% dplyr::filter(str_detect(dataGroups, 'test|parkinson;control|control;parkinson')) %>% .$healthCode %>% unique()
#filter out healthCodes
df <- df %>% dplyr::filter(! healthCode %in% testHealthCodes)
##Create a column for individuals that are clinically referred
## Cohort 1 - ObjectivePD
objectivePD_cohort <- data.table::fread(synGet("syn8533708")$path) %>% dplyr::rename(healthCode = healthcode)
objectivePD_cohort_mdata <- data.table::fread(synGet("syn12555309")$path, data.table = F)
objectivePD_cohort_mdata <- objectivePD_cohort_mdata[, c('healthCode', "Do you have Parkinson disease?")]
objectivePD_cohort_mdata <- objectivePD_cohort_mdata %>% distinct()
objectivePD_cohort_mdata['clinicalDiagnosis'] = objectivePD_cohort_mdata$`Do you have Parkinson disease?`
objectivePD_cohort_mdata$`Do you have Parkinson disease?` <- NULL
#clincal referral status + right case status
objectivePD_cohort <- merge(objectivePD_cohort, objectivePD_cohort_mdata, all=T, by='healthCode') %>%
dplyr::filter(!is.na(clinicalDiagnosis)) %>%
dplyr::select(-id) %>%
dplyr::mutate(clinicalDiagnosis = ifelse(clinicalDiagnosis == 'Yes', T, F),
clinicalReferral = T)
#merge with metadata
df <- merge(df,objectivePD_cohort, all=T)
#For objectivePD cohort - replace value of inferred_diagnosis by clinicalDiagnosis that is taken from curated data
to.replace = !is.na(df$clinicalDiagnosis)
df$inferred_diagnosis[to.replace] = df$clinicalDiagnosis[to.replace]
df$clinicalDiagnosis <- NULL
## Cohort 2
## The clinical referrals is the intersection between that file and the query:
###Github ref - https://github.com/Sage-Bionetworks/mhealth-engagement-analysis/issues/6
otherClinicalReferrals <- synTableQuery("SELECT * FROM syn3420237 where externalId<>'' and dataGroups not like '%test%'")
otherClinicalReferrals <- otherClinicalReferrals$asDataFrame() %>% .$healthCode %>% unique()
otherClinicalReferrals <- setdiff(otherClinicalReferrals, objectivePD_cohort$healthCode)
#select only those have inferred diagnosis = T
otherClinicalReferrals <- df %>%
filter(healthCode %in% otherClinicalReferrals & inferred_diagnosis == T ) %>% .$healthCode
df$clinicalReferral[df$healthCode %in% otherClinicalReferrals] = T
###Fill F for all others who are not clinical
df$clinicalReferral[is.na(df$clinicalReferral)] = F
df <- df %>% dplyr::rename(state = Enter_State) %>%
dplyr::mutate(state = stringr::str_to_title(state)) %>%
dplyr::select(-dataGroups) #remove dataGroups - pretty confusing col at this point with so much missing
#Fix Race (In this order is IMP)
torep <- grepl('Latino/Hispanic', df$race)
df$race[torep] = 'Hispanic/Latinos'
torep <- grepl('Black or African', df$race)
df$race[torep] = 'African-American/Black'
torep <- grepl('Native American', df$race)
df$race[torep] = 'AIAN'
torep <- grepl('Asian', df$race)
df$race[torep] = 'Asian'
torep <- grepl('White or Caucasian', df$race)
df$race[torep] = 'Non-Hispanic White'
torep <- grepl('Other|Middle Eastern|Caribbean', df$race, perl=T)
df$race[torep] = 'Other'
torep <- grepl('Mixed', df$race, perl=T)
df$race[torep] = 'More than one'
torep <- grepl('Pacific Islander', df$race, perl=T)
df$race[torep] = 'Native Hawaiian or other Pacific Islander'
#### Employment
torep <- grepl('Self-employed|full time|Employment for wages|Military', df$employment)
df$employment[torep] = 'employed'
torep <- grepl('Unable to work', df$employment)
df$employment[torep] = 'Unable to work'
torep <- grepl('Retired', df$employment)
df$employment[torep] = 'Retired'
torep <- grepl('Out of work|no work', df$employment)
df$employment[torep] = 'unemployed'
torep <- grepl('student', df$employment)
df$employment[torep] = 'student'
torep <- grepl('occasional|part time', df$employment)
df$employment[torep] = 'part-time'
df <- df %>% dplyr::rename(caseStatus = inferred_diagnosis) %>%
dplyr::mutate(study = 'mPower')
#Update Education
df <- df %>%
dplyr::mutate(education = case_when(
grepl('Post graduate|Doctoral Degree|Master', education, perl=T, ignore.case = T) ~ 'Post graduate',
education %in% c('8th grade or less', 'More than 8th grade but did not graduate high school') ~ 'Below High School',
education %in% c('Some college', 'Graduate of Four Year College', 'Graduate of Two Year College or Technical School', 'Some graduate school') ~ 'College',
education %in% c('High school graduate or equivalent', 'Some high school') ~ 'High School',
grepl('College', education, perl=T, ignore.case = T) ~ 'College',
grepl('high school', education, perl=T, ignore.case = T) ~ 'High School',
TRUE ~ education))
#Filter out unclear gender
df <- df %>% filter(gender %in% c('Female', 'Male', 'Prefer not to answer'))
return(df)
}
main <- function() {
synLogin()
#Meta Data
#mpower_metadata <- curate_mpower_metadata()
#write_csv(mpower_metadata, "mpower_metadata.csv")
#f_meta <- synapser::File("mpower_metadata.csv", parent="syn19520775")
#synStore(f_meta, used=list("syn15624955"))
#unlink("mpower_metadata.csv")
#mPower engagement
mpower <- fetch_and_prep() %>%
mutate_participant_week_day() %>%
mutate_task_type() %>%
mutate_task_frequency() %>%
mutate_local_time()
###Keep only those healthCode that have entry in the metaData file
#mpower <- mpower %>%
# dplyr::filter(healthCode %in% .data = mpower_metadata$healthCode)
write_tsv(mpower, "mpower_engagement_curated.tsv")
f <- synapser::File("mpower_engagement_curated.tsv", parent="syn21054482") #syn18936398")
synStore(f, used=list("syn21054482"), #syn11488492"),
executed=paste0("https://gist.github.com/philerooski/",
"e357930a484c3b1190a58c0a75192daf"))
unlink("mpower_engagement.tsv")
}
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment