Skip to content

Instantly share code, notes, and snippets.

@philerooski
Created April 22, 2020 20:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save philerooski/5e3170fab462bb5f662131f9e45464d9 to your computer and use it in GitHub Desktop.
Save philerooski/5e3170fab462bb5f662131f9e45464d9 to your computer and use it in GitHub Desktop.
# Entry function to extract tremor features
######################
# Load Libraries
######################
library(optparse)
library(data.table)
library(plyr)
library(tidyverse)
library(jsonlite)
library(uuid)
library(lubridate)
library(synapser)
library(mpowertools)
# devtools::install_github("Sage-Bionetworks/mpowertools")
synLogin(Sys.getenv("synapseUsername"),
Sys.getenv("synapsePassword"))
batch_job_number <- Sys.getenv("AWS_BATCH_JOB_ARRAY_INDEX")
###########################
# Synapse I/O parameters
###########################
readArgs <- function() {
parser <- OptionParser(description='Extract tremor features.')
parser <- add_option(parser, "--jobTable", type="character",
help=paste("Synapse Table containing the record IDs",
"to be processed."))
parser <- add_option(parser, "--inputTable", type="character",
help=paste("Synapse Table containing columns with raw",
"features to extract."))
parser <- add_option(parser, "--assayName", type="character",
help="Name of assay being processed")
parser <- add_option(parser, "--assayColumn", type="character",
help="Name of column with assay json files")
parser <- add_option(parser, "--outputPath", type="character",
help="Directory to write results to.")
parse_args(parser)
}
# Set input parameters
args <- readArgs()
jobTable <- synTableQuery(paste0("select jobInfo from ", args$jobTable,
" where uid = ", batch_job_number))$asDataFrame()
rid.to.extract <- fromJSON(jobTable$jobInfo)$recordIds
# Get source tremor table from synapse
rid.to.extract.str.list <- paste0("('",
paste(rid.to.extract, collapse="', '"),
"')")
query <- paste("select * from", args$inputTable,
"where recordId in", rid.to.extract.str.list)
print(paste("Querying", query))
tremor.table <- synTableQuery(query)
tremor.table.values <- tremor.table$asDataFrame()
########################
# Feature extraction
########################
# Download tremor json files
print(paste("Downloading json files for", args$assayName))
if (dim(tremor.table.values)[1] > 0) {
get.tremor.json.loc <- function(col.name, tremorTable) {
tremor.table.values[col.name] <- as.character(
tremor.table.values[,col.name])
tbl.files <- synDownloadTableColumns(tremorTable, col.name) %>%
lapply(function(x) data.frame(V1=x)) %>%
rbindlist(idcol=col.name) %>%
plyr::rename(c('V1'=gsub('.items',
'.fileLocation',
col.name))) %>%
dplyr::left_join(tremor.table.values, by=col.name)
return(tbl.files)
}
tremor.json.loc <- get.tremor.json.loc(args$assayColumn, tremor.table)
} else {
tremor.json.loc <- data.frame()
}
## Set input parameters
WORKING_DIR = '/mPower_efs/mPowerAnalysis/featureExtraction/'
LIBRARY_DIRECTORY = '/work/mylibs/'
CACHE_DIR = '/mPower_efs/.synapseCache/'
TREMOR_TBL = 'syn10676309' # mPower public tremor table from Sage-internal project
TREMOR_FILE_LOC_ID = 'syn10916227' # files containing the json fle location in /mPower_efs
FEATURES_PARENT_ID = 'syn8362851' # Destination directory in synapse
UPDATE = FALSE
n = 1000
PARAM = list(
handInLapLeft = list(FEATURES_ID = 'syn11597056', # Destination file id where features are to be stored
COLUMN_NAME = 'deviceMotion_tremor_handInLap_left.json.items', # Source column name of synapse table to obtain json files
COLUMN_NAME1 = 'deviceMotion_tremor_handInLap_right.json.items'), # Comparison column name of synapse table to obtain json files
handInLapRight = list(FEATURES_ID = 'syn11597058', # Destination file id where features are to be stored
COLUMN_NAME = 'deviceMotion_tremor_handInLap_right.json.items',
COLUMN_NAME1 = 'deviceMotion_tremor_handInLap_left.json.items'),
handAtShoulderLengthLeft = list(FEATURES_ID = 'syn11597059', # Destination file id where features are to be stored,
COLUMN_NAME = 'deviceMotion_tremor_handAtShoulderLength_left.json.items',
COLUMN_NAME1 = 'deviceMotion_tremor_handAtShoulderLength_right.json.items'),
handAtShoulderLengthRight = list(FEATURES_ID = 'syn11597060', # Destination file id where features are to be stored
COLUMN_NAME = 'deviceMotion_tremor_handAtShoulderLength_right.json.items',
COLUMN_NAME1 = 'deviceMotion_tremor_handAtShoulderLength_left.json.items'),
handToNoseLeft = list(FEATURES_ID = 'syn11597061', # Destination file id where features are to be stored
FNAME = 'handToNoseLeftTremorFeatures.tsv',
COLUMN_NAME = 'deviceMotion_tremor_handToNose_left.json.items'),
handToNoseRight = list(FEATURES_ID = 'syn11597062', # Destination file id where features are to be stored
FNAME = 'handToNoseRightTremorFeatures.tsv',
COLUMN_NAME = 'deviceMotion_tremor_handToNose_right.json.items')
)
#################
# Github commit
#################
thisFileName = 'tremorModule.R'
thisRepo <- getRepo(repository = "th1vairam/mPowerAnalysis", ref="branch", refName='tremor')
thisFile <- getPermlink(repository = thisRepo, repositoryPath=paste0('featureExtraction/',thisFileName))
thisRepo <- getRepo(repository = "th1vairam/mpowertools", ref="branch", refName='tremor_dev')
thisFile1 <- getPermlink(repository = thisRepo, repositoryPath=paste0('R/','getTremorFeatures.R'))
thisFile2 <- getPermlink(repository = thisRepo, repositoryPath=paste0('R/','getKineticTremorFeatures.R'))
thisFile3 <- getPermlink(repository = thisRepo, repositoryPath=paste0('R/','getComparativeTremorFeatures.R'))
##############################
# Extract features
print(paste("Extracting features for", args$assayName))
if(dim(tremor.json.loc)[1] != 0) {
assayList <- c('handInLapLeft','handInLapRight',
'handAtShoulderLengthLeft', 'handAtShoulderLengthRight')
if (any(args$assayName %in% assayList)) {
ftr <- tremor.json.loc %>%
plyr::dlply(.(recordId),
.fun=function(x, columnName) {
mpowertools::getTremorFeatures(
x[1, gsub('.items',
'.fileLocation',
columnName)])},
args$assayColumn) %>%
data.table::rbindlist(idcol='recordId', use.names=T, fill=T) %>%
dplyr::mutate(Assay=args$assayName)
}
else if (any(args$assayName %in% c('handToNoseLeft','handToNoseRight'))) {
ftr <- tremor.json.loc %>%
plyr::dlply(.(recordId),
.fun=function(x, columnName) {
mpowertools::getKineticTremorFeatures(
x[1,gsub('.items',
'.fileLocation',
columnName)])},
args$assayColumn) %>%
data.table::rbindlist(idcol='recordId', use.names=T, fill=T) %>%
dplyr::mutate(Assay=args$assayName)
}
# Download tremor json files
if (dim(tremor.table@values)[1] > 0){
if (any(assay %in% c('handInLapLeft','handInLapRight', 'handAtShoulderLengthLeft', 'handAtShoulderLengthRight'))){
tremor.json.loc = lapply(para[c('COLUMN_NAME', 'COLUMN_NAME1')], function(col.name, tremorTable){
tbl.files = synDownloadTableColumns(tremorTable, col.name) %>%
lapply(function(x) data.frame(V1 = x)) %>%
rbindlist(idcol = col.name) %>%
plyr::rename(c('V1' = gsub('.items','.fileLocation', col.name))) %>%
dplyr::left_join(tremorTable@values, by = col.name)
}, tremor.table) %>%
plyr::join_all(type = 'full', by = c('recordId', 'healthCode'))
} else {
tremor.json.loc = lapply(para[c('COLUMN_NAME')], function(col.name, tremorTable){
tbl.files = synDownloadTableColumns(tremorTable, col.name) %>%
lapply(function(x) data.frame(V1 = x)) %>%
rbindlist(idcol = col.name) %>%
plyr::rename(c('V1' = gsub('.items','.fileLocation', col.name))) %>%
dplyr::left_join(tremorTable@values, by = col.name)
}, tremor.table) %>%
plyr::join_all(type = 'full', by = c('recordId', 'healthCode'))
}
} else {
tremor.json.loc = data.frame()
}
parent.directory = paste0(args$outputPath, "/", args$assayName)
if (!dir.exists(parent.directory)) {
dir.create(parent.directory)
}
fpath=paste0(parent.directory, "/", uuid::UUIDgenerate(), '.tsv')
data.table::fwrite(ftr, file=fpath, append=FALSE,
row.names=FALSE, quote=FALSE, sep='\t')
}
print(paste('Finished', args$assayName, batch_job_number))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment