Skip to content

Instantly share code, notes, and snippets.

View mikelove's full-sized avatar

Michael Love mikelove

View GitHub Profile
@mikelove
mikelove / conditional_scz_gwas_2022_hg19.csv
Created August 10, 2023 15:43
Trubetskoy et al (2022) Supp Table 10b, conditionally independent associations (hg19)
We can make this file beautiful and searchable if this error is corrected: It looks like row 6 should actually have 28 columns, instead of 7. in line 5.
Index CHR,Index SNP,BP Left,BP Right,# Conditional independent,Index BP,Index P value,Merged Indexes,2nd SNP,2nd BP,2nd P value,3rd SNP,3rd BP,3rd P value,4th SNP,4th BP,4th P value,5th SNP,5th BP,5th P value,,,,,,,,
6,rs13195636,24939493,30559493,4,27509493,3.33E-40,rs13195636,rs9461856,33395199,4.13E-14,rs8192589,32187637,2.27E-10,rs9368789,34001678,2.03E-07,rs10946808,26233387,1.11E-06,,,,,,,,
18,rs9636107,52666708,53680258,4,53200117,1.92E-13,"rs12969453,rs74914300,rs72934602,rs9636107",rs12969453,52751708,3.93E-11,rs78322266,53063676,6.89E-08,rs72934602,53568458,6.79E-07,rs9952704,53447690,2.60E-04,,,,,,,,
3,rs2710323,48132866,53603354,3,52815905,1.02E-21,"rs11715134,rs11917680,rs2710323,rs13080668",rs2236989,50505395,1.81E-10,rs7633840,48719638,4.41E-07,rs4687724,53408177,1.10E-04,,,,,,,,,,,
18,rs72980087,77324421,77981194,2,77632194,4.80E-16,"rs56040937,rs7238071,rs72980087,rs72970145",rs11664298,77578986,5.24E-16,rs12455965,77688830,0.0001109,,,,,,,,,,,,,,
3,rs13090130,161092491,161969035,2,161777035,
@mikelove
mikelove / join_overlap_distance.R
Created August 3, 2023 14:03
join_overlap_* but with distance as a metadata column
library(plyranges)
x <- data.frame(seqnames=1,
start=sample(1000,10,FALSE),
width=1, id=1:10) %>%
as_granges()
y <- data.frame(seqnames=1,
start=sample(1000,10,FALSE),
width=1, id=letters[1:10]) %>%
as_granges()
@mikelove
mikelove / many_groups.R
Created July 14, 2023 13:47
issues with summarizing over many groups in plyranges - related to Issue #30
library(plyranges)
library(microbenchmark)
library(dplyr)
library(tibble)
make_rand_gr <- function(N, grps) {
data.frame(seqnames = sample(c("seq1", "seq2", "seq3"), N, replace = TRUE),
strand = sample(c("+", "-", "*"), N, replace = TRUE), start = rpois(N,
N), width = rpois(N, N), grps = sample(grps, N, replace = TRUE),
score = runif(N)) %>% as_granges()
@mikelove
mikelove / bioc_meme.R
Last active July 12, 2023 09:04
Biocondutor meme text
airway@assays@data[["counts"]][airway@rowRanges@elementMetadata@listData$gene_id == "ENSG00000000003", airway@colData@listData$dex == "trt"]
assays(airway)[["counts"]][names(rowRanges(airway)) == "ENSG00000000003", colData(airway)$dex == "trt"]
assay(airway, "counts")["ENSG00000000003", airway$dex == "trt"]
airway |> filter(symbol == "TSPAN6", dex == "trt") |> pull(counts)
@mikelove
mikelove / banana.R
Created July 6, 2023 07:15
file to make UpSet plot of banana intersections
# scraped from https://doi.org/10.1038/nature11241
# note that intersections < 100 not included
banana_sets <- c(
Musa = 759,
Phoenix = 769,
Sorghum = 827,
Brachypodium = 387,
Ozyza = 1246,
Arabidopsis = 1187,
`Musa&Phoenix` = 467,
@mikelove
mikelove / E-P_pairs.R
Created June 16, 2023 06:35
New approach to computing correlations between pairs of overlapping features in terms of data matrices
library(plyranges)
set.seed(1)
x <- data.frame(seqnames=1, start=0:9 * 100 + 1,
width=20, id=1:10) %>%
as_granges()
y <- data.frame(seqnames=1, start=round(runif(4,100,900)),
width=10, id=letters[1:4]) %>%
as_granges() %>%
@mikelove
mikelove / deseq2_curves.R
Created June 15, 2023 06:17
Drawing DESeq2 fitted spline curves on top of scaled counts
library(splines)
library(DESeq2)
# make some demo data
dds <- makeExampleDESeqDataSet(n=100, m=40)
dds$condition <- sort(runif(40))
# make one gene where expression has a curve shape (just for demo)
s_shape <- round(500 * sin(dds$condition*2*pi) + 1000 + rnorm(40,0,50))
mode(s_shape) <- "integer"
counts(dds)[1,] <- s_shape
@mikelove
mikelove / wordcloud.R
Created June 14, 2023 07:26
word cloud for CSAMA
library(tm)
library(wordcloud)
crude <- scan("words", what="char", sep="\n")
crude <- gsub("/"," ",crude)
crude <- gsub("single cell","singlecell",crude)
crude <- Corpus(VectorSource(crude))
crude <- suppressWarnings(tm_map(crude, removePunctuation))
crude <- suppressWarnings(tm_map(crude, function(x) removeWords(x, stopwords())))
tdm <- TermDocumentMatrix(crude)
m <- as.matrix(tdm)
@mikelove
mikelove / ase_analysis_with_deseq2.R
Created June 5, 2023 17:55
Code for performing ASE analysis with DESeq2
design <- ~0 + donor + allele
dds <- DESeqDataSetFromMatrix(counts, coldata, design) # counts has the two alleles per donor
assays(dds)[["weights"]] <- weights # 1 for hets, 1e-6 for homs
sizeFactors(dds) <- rep(1, ncol(dds))
dds <- DESeq(dds, test="LRT", reduced=~0 + donor)
res <- results(dds)
@mikelove
mikelove / data_and_gene.R
Created May 3, 2023 19:50
Manhattan plot with genes using plotgardener
library(plotgardener)
par <- pgParams(
chrom = "chr1",
chromstart = 8.2e6,
chromend = 8.7e6,
assembly = "hg19",
just = c("left", "bottom")
)
dat <- data.frame(chrom="chr1", pos=840:849 * 1e4 + 1, p=1:10/10)
pageCreate(width = 4, height = 3.5, showGuides = TRUE)