Skip to content

Instantly share code, notes, and snippets.

@kdaily
Created April 15, 2020 19:23
Show Gist options
  • Save kdaily/ab901e9a90ed8ee3703bc5dc58847a09 to your computer and use it in GitHub Desktop.
Save kdaily/ab901e9a90ed8ee3703bc5dc58847a09 to your computer and use it in GitHub Desktop.
fastq to bam add provenance, was used in BSMN
library(synapseClient)
synapseLogin()
table_id <- "syn7871084"
fileview_df <- synTableQuery(sprintf('select * from %s', table_id))@values
fastq2bam <- fileview_df %>%
filter(fileFormat %in% c("fastq", "bam")) %>% # keep fastq and bam
# group for cases where there are multiple files per file format
group_by(sample_id_biorepository,
sample_id_original, experiment_id, fileFormat) %>%
# get all IDs by file format
mutate(ids=paste(id, collapse=",")) %>%
# get relevant fields, specifically excluding original id column
select(sample_id_biorepository,
sample_id_original, experiment_id, fileFormat, ids) %>%
ungroup() %>%
distinct() %>%
# spread fastq and bam to separate columns
spread(fileFormat, ids) %>%
# keep those that are not null (have both bam and fastq)
filter(!is.na(fastq), !is.na(bam))
addProvenance <- function(fastq, bam) {
bams <- stringr::str_split(bam, ",")
if (length(bams) > 1) {
return(NULL)
}
else {
ent <- synGet(bam, downloadFile=FALSE)
generatedBy(ent) <- Activity(name="Alignment",
description="fastq to bam alignment",
used=stringr::str_split(fastq, ",")[[1]])
return(generatedBy(synStore(ent, forceVersion=FALSE)))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment