Michael Love mikelove

## conditional_scz_gwas_2022_hg19.csv
Index CHR,Index SNP,BP Left,BP Right,# Conditional independent,Index BP,Index P value,Merged Indexes,2nd SNP,2nd BP,2nd P value,3rd SNP,3rd BP,3rd P value,4th SNP,4th BP,4th P value,5th SNP,5th BP,5th P value,,,,,,,,
6,rs13195636,24939493,30559493,4,27509493,3.33E-40,rs13195636,rs9461856,33395199,4.13E-14,rs8192589,32187637,2.27E-10,rs9368789,34001678,2.03E-07,rs10946808,26233387,1.11E-06,,,,,,,,
18,rs9636107,52666708,53680258,4,53200117,1.92E-13,"rs12969453,rs74914300,rs72934602,rs9636107",rs12969453,52751708,3.93E-11,rs78322266,53063676,6.89E-08,rs72934602,53568458,6.79E-07,rs9952704,53447690,2.60E-04,,,,,,,,
3,rs2710323,48132866,53603354,3,52815905,1.02E-21,"rs11715134,rs11917680,rs2710323,rs13080668",rs2236989,50505395,1.81E-10,rs7633840,48719638,4.41E-07,rs4687724,53408177,1.10E-04,,,,,,,,,,,
18,rs72980087,77324421,77981194,2,77632194,4.80E-16,"rs56040937,rs7238071,rs72980087,rs72970145",rs11664298,77578986,5.24E-16,rs12455965,77688830,0.0001109,,,,,,,,,,,,,,
3,rs13090130,161092491,161969035,2,161777035,

## join_overlap_distance.R
library(plyranges)
x <- data.frame(seqnames=1,
                start=sample(1000,10,FALSE),
                width=1, id=1:10) %>%
  as_granges()
y <- data.frame(seqnames=1,
                start=sample(1000,10,FALSE),
                width=1, id=letters[1:10]) %>%
  as_granges()

## many_groups.R
library(plyranges)
library(microbenchmark)
library(dplyr)
library(tibble)

make_rand_gr <- function(N, grps) {
  data.frame(seqnames = sample(c("seq1", "seq2", "seq3"), N, replace = TRUE),
    strand = sample(c("+", "-", "*"), N, replace = TRUE), start = rpois(N,
      N), width = rpois(N, N), grps = sample(grps, N, replace = TRUE),
    score = runif(N)) %>% as_granges()

## bioc_meme.R
airway@assays@data[["counts"]][airway@rowRanges@elementMetadata@listData$gene_id == "ENSG00000000003", airway@colData@listData$dex == "trt"]
assays(airway)[["counts"]][names(rowRanges(airway)) == "ENSG00000000003", colData(airway)$dex == "trt"]
assay(airway, "counts")["ENSG00000000003", airway$dex == "trt"]
airway |> filter(symbol == "TSPAN6", dex == "trt") |> pull(counts)

## banana.R
# scraped from https://doi.org/10.1038/nature11241
# note that intersections < 100 not included
banana_sets <- c(
	Musa = 759,
	Phoenix = 769,
	Sorghum = 827,
	Brachypodium = 387,
	Ozyza = 1246,
	Arabidopsis = 1187,
	`Musa&Phoenix` = 467,

## E-P_pairs.R
library(plyranges)

set.seed(1)
x <- data.frame(seqnames=1, start=0:9 * 100 + 1,
                width=20, id=1:10) %>%
  as_granges()

y <- data.frame(seqnames=1, start=round(runif(4,100,900)),
                width=10, id=letters[1:4]) %>%
  as_granges() %>%

## deseq2_curves.R
library(splines)
library(DESeq2)

# make some demo data
dds <- makeExampleDESeqDataSet(n=100, m=40)
dds$condition <- sort(runif(40))
# make one gene where expression has a curve shape (just for demo)
s_shape <- round(500 * sin(dds$condition*2*pi) + 1000 + rnorm(40,0,50))
mode(s_shape) <- "integer"
counts(dds)[1,] <- s_shape

## wordcloud.R
library(tm)
library(wordcloud)
crude <- scan("words", what="char", sep="\n")
crude <- gsub("/"," ",crude)
crude <- gsub("single cell","singlecell",crude)
crude <- Corpus(VectorSource(crude))
crude <- suppressWarnings(tm_map(crude, removePunctuation))
crude <- suppressWarnings(tm_map(crude, function(x) removeWords(x, stopwords())))
tdm <- TermDocumentMatrix(crude)
m <- as.matrix(tdm)

## ase_analysis_with_deseq2.R
design <- ~0 + donor + allele
dds <- DESeqDataSetFromMatrix(counts, coldata, design) # counts has the two alleles per donor
assays(dds)[["weights"]] <- weights # 1 for hets, 1e-6 for homs
sizeFactors(dds) <- rep(1, ncol(dds))
dds <- DESeq(dds, test="LRT", reduced=~0 + donor)
res <- results(dds)

## data_and_gene.R
library(plotgardener)
par <- pgParams(
  chrom = "chr1",
  chromstart = 8.2e6,
  chromend = 8.7e6,
  assembly = "hg19",
  just = c("left", "bottom")
)
dat <- data.frame(chrom="chr1", pos=840:849 * 1e4 + 1, p=1:10/10)
pageCreate(width = 4, height = 3.5, showGuides = TRUE)
	Index CHR,Index SNP,BP Left,BP Right,# Conditional independent,Index BP,Index P value,Merged Indexes,2nd SNP,2nd BP,2nd P value,3rd SNP,3rd BP,3rd P value,4th SNP,4th BP,4th P value,5th SNP,5th BP,5th P value,,,,,,,,
	6,rs13195636,24939493,30559493,4,27509493,3.33E-40,rs13195636,rs9461856,33395199,4.13E-14,rs8192589,32187637,2.27E-10,rs9368789,34001678,2.03E-07,rs10946808,26233387,1.11E-06,,,,,,,,
	18,rs9636107,52666708,53680258,4,53200117,1.92E-13,"rs12969453,rs74914300,rs72934602,rs9636107",rs12969453,52751708,3.93E-11,rs78322266,53063676,6.89E-08,rs72934602,53568458,6.79E-07,rs9952704,53447690,2.60E-04,,,,,,,,
	3,rs2710323,48132866,53603354,3,52815905,1.02E-21,"rs11715134,rs11917680,rs2710323,rs13080668",rs2236989,50505395,1.81E-10,rs7633840,48719638,4.41E-07,rs4687724,53408177,1.10E-04,,,,,,,,,,,
	18,rs72980087,77324421,77981194,2,77632194,4.80E-16,"rs56040937,rs7238071,rs72980087,rs72970145",rs11664298,77578986,5.24E-16,rs12455965,77688830,0.0001109,,,,,,,,,,,,,,
	3,rs13090130,161092491,161969035,2,161777035,
	library(plyranges)
	x <- data.frame(seqnames=1,
	start=sample(1000,10,FALSE),
	width=1, id=1:10) %>%
	as_granges()
	y <- data.frame(seqnames=1,
	start=sample(1000,10,FALSE),
	width=1, id=letters[1:10]) %>%
	as_granges()
	library(plyranges)
	library(microbenchmark)
	library(dplyr)
	library(tibble)

	make_rand_gr <- function(N, grps) {
	data.frame(seqnames = sample(c("seq1", "seq2", "seq3"), N, replace = TRUE),
	strand = sample(c("+", "-", "*"), N, replace = TRUE), start = rpois(N,
	N), width = rpois(N, N), grps = sample(grps, N, replace = TRUE),
	score = runif(N)) %>% as_granges()
	airway@assays@data[["counts"]][airway@rowRanges@elementMetadata@listData$gene_id == "ENSG00000000003", airway@colData@listData$dex == "trt"]
	assays(airway)[["counts"]][names(rowRanges(airway)) == "ENSG00000000003", colData(airway)$dex == "trt"]
	assay(airway, "counts")["ENSG00000000003", airway$dex == "trt"]
	airway \|> filter(symbol == "TSPAN6", dex == "trt") \|> pull(counts)
	# scraped from https://doi.org/10.1038/nature11241
	# note that intersections < 100 not included
	banana_sets <- c(
	Musa = 759,
	Phoenix = 769,
	Sorghum = 827,
	Brachypodium = 387,
	Ozyza = 1246,
	Arabidopsis = 1187,
	`Musa&Phoenix` = 467,
	library(plyranges)

	set.seed(1)
	x <- data.frame(seqnames=1, start=0:9 * 100 + 1,
	width=20, id=1:10) %>%
	as_granges()

	y <- data.frame(seqnames=1, start=round(runif(4,100,900)),
	width=10, id=letters[1:4]) %>%
	as_granges() %>%
	library(splines)
	library(DESeq2)

	# make some demo data
	dds <- makeExampleDESeqDataSet(n=100, m=40)
	dds$condition <- sort(runif(40))
	# make one gene where expression has a curve shape (just for demo)
	s_shape <- round(500 * sin(dds$condition2pi) + 1000 + rnorm(40,0,50))
	mode(s_shape) <- "integer"
	counts(dds)[1,] <- s_shape
	library(tm)
	library(wordcloud)
	crude <- scan("words", what="char", sep="\n")
	crude <- gsub("/"," ",crude)
	crude <- gsub("single cell","singlecell",crude)
	crude <- Corpus(VectorSource(crude))
	crude <- suppressWarnings(tm_map(crude, removePunctuation))
	crude <- suppressWarnings(tm_map(crude, function(x) removeWords(x, stopwords())))
	tdm <- TermDocumentMatrix(crude)
	m <- as.matrix(tdm)
	design <- ~0 + donor + allele
	dds <- DESeqDataSetFromMatrix(counts, coldata, design) # counts has the two alleles per donor
	assays(dds)[["weights"]] <- weights # 1 for hets, 1e-6 for homs
	sizeFactors(dds) <- rep(1, ncol(dds))
	dds <- DESeq(dds, test="LRT", reduced=~0 + donor)
	res <- results(dds)
	library(plotgardener)
	par <- pgParams(
	chrom = "chr1",
	chromstart = 8.2e6,
	chromend = 8.7e6,
	assembly = "hg19",
	just = c("left", "bottom")
	)
	dat <- data.frame(chrom="chr1", pos=840:849 * 1e4 + 1, p=1:10/10)
	pageCreate(width = 4, height = 3.5, showGuides = TRUE)