Skip to content

Instantly share code, notes, and snippets.

View skurscheid's full-sized avatar

Sebastian Kurscheid skurscheid

View GitHub Profile
@skurscheid
skurscheid / fasta_U_to_T.sh
Created June 1, 2019 00:00
bash one liner to convert RNA sequences to DNA sequences
samtools faidx $fastafile -r $IDs | awk '{if ($1 !~ /^ *>/) { gsub("U", "T"); print $0 } else { print $0 }}' | gzip - -c > $fastafile_dna_space
@skurscheid
skurscheid / get_conda_packages.R
Last active April 24, 2020 22:37
Obtain names of R packages in a format compatible for installation via conda
condaPackages <- function(x){
lapply(names(x$otherPkgs), function(y){
if ("git_url" %in% names(x$otherPkgs[[y]])){
repo = "bioconductor-"
} else {
repo = "r-"
}
paste(repo, y, sep ="")
})
}
@skurscheid
skurscheid / histonesInBioGrid.R
Last active March 12, 2019 23:49
Lookup histone genes in BioGRID database using data.table
library(data.table)
library(curl)
# Download using curl
curl::curl_download(url = "https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive/BIOGRID-3.5.170/BIOGRID-ALL-3.5.170.tab2.zip", destfile = "~/BIOGRID-ALL-3.5.170.tab2.zip")
# unzip
unzip("~/BIOGRID-ALL-3.5.170.tab2.zip")
# import data from local server into R Studio Server
@skurscheid
skurscheid / suppa_execution.sh
Created February 21, 2019 04:34
example of running preparing data and executing suppa
#!/usr/bin/env bash
suppa.py joinFiles --file-extension tpm\
--input-files RNA-Seq/suppa/GRCm38_ensembl93_ERCC/NB501086_0219_TSoboleva_JCSMR_RNAseq/WT_18_38/abundance.tpm RNA-Seq/suppa/GRCm38_ensembl93_ERCC/NB501086_0219_TSoboleva_JCSMR_RNAseq/WT_37_39/abundance.tpm RNA-Seq/suppa/GRCm38_ensembl93_ERCC/NB501086_0219_TSoboleva_JCSMR_RNAseq/WT_46_47/abundance.tpm\
--output RNA-Seq/suppa/pooled/GRCm38_ensembl93_ERCC/NB501086_0219_TSoboleva_JCSMR_RNAseq/WT/abundance.tpm
suppa.py joinFiles --file-extension tpm\
--input-files RNA-Seq/suppa/GRCm38_ensembl93_ERCC/NB501086_0219_TSoboleva_JCSMR_RNAseq/KO_19_26/abundance.tpm RNA-Seq/suppa/GRCm38_ensembl93_ERCC/NB501086_0219_TSoboleva_JCSMR_RNAseq/KO_24_25/abundance.tpm RNA-Seq/suppa/GRCm38_ensembl93_ERCC/NB501086_0219_TSoboleva_JCSMR_RNAseq/KO_44_45/abundance.tpm\
--output RNA-Seq/suppa/pooled/GRCm38_ensembl93_ERCC/NB501086_0219_TSoboleva_JCSMR_RNAseq/KO/abundance.tpm
@skurscheid
skurscheid / RUVSeqScript.R
Created February 12, 2019 22:56
runs RUVSeq analysis for removing "unwanted" variation using ERCC spike ins
library(RUVSeq)
library(tximport)
library(data.table)
library(EDASeq)
library(RColorBrewer)
library(ggplot2)
library(sleuth)
library(GenomicRanges)
# comments: remove KO_19_26 & WT_46_47
@skurscheid
skurscheid / filter120.json
Created February 11, 2019 02:14
bamtools filter script
{
"insertSize" : "<=120"
}
@skurscheid
skurscheid / bamtools_inserSize.sh
Last active February 11, 2019 02:17
filter BAM file by insert size [using bamtools]
bamtools filter -in KO_01_K36me3.bam -script filter120.json -out KO_01_K36me3.filtered.bam
@skurscheid
skurscheid / necklace_diffSplice.R
Created February 7, 2019 23:53
Differential splicing analysis with edgeR (closely following the edgeR Users' Guide)
library(data.table)
library(edgeR)
# this part is just setting up local working directory and sample annotation information from config.json
pathPrefix <- "~"
if (!dir.exists(file.path(pathPrefix, "Data/Tremethick/Hodgkins-Lymphoma/RNA-Seq/necklace_results/R_analysis"))) {
dir.create("R_analysis")
setwd(file.path(pathPrefix, "Data/Tremethick/Hodgkins-Lymphoma/RNA-Seq/necklace_results/R_analysis"))
} else {
setwd(file.path(pathPrefix, "Data/Tremethick/H
@skurscheid
skurscheid / gist:6253b2ad9a047175673ff0d89a6a290e
Last active February 6, 2019 00:21
edgeR analysis from kallisto quantification, using tximport
library(rhdf5)
library(sleuth)
library(biomaRt)
library(tidyr)
library(rtracklayer)
library(BiocParallel)
library(tximport)
library(readr)
library(RUVSeq)
library(data.table)
@skurscheid
skurscheid / gist:5b8aca3bf4c33dc5584bfe7fea34889b
Created June 29, 2018 02:20
GNU parallel - redirect output per job to separate files
parallel md5sum {} ">" {}.md5 ::: *.gz