Michael Love mikelove

## join_overlap_distance.R
library(plyranges)
x <- data.frame(seqnames=1,
                start=sample(1000,10,FALSE),
                width=1, id=1:10) %>%
  as_granges()
y <- data.frame(seqnames=1,
                start=sample(1000,10,FALSE),
                width=1, id=letters[1:10]) %>%
  as_granges()

## many_groups.R
library(plyranges)
library(microbenchmark)
library(dplyr)
library(tibble)

make_rand_gr <- function(N, grps) {
  data.frame(seqnames = sample(c("seq1", "seq2", "seq3"), N, replace = TRUE),
    strand = sample(c("+", "-", "*"), N, replace = TRUE), start = rpois(N,
      N), width = rpois(N, N), grps = sample(grps, N, replace = TRUE),
    score = runif(N)) %>% as_granges()

## bioc_meme.R
airway@assays@data[["counts"]][airway@rowRanges@elementMetadata@listData$gene_id == "ENSG00000000003", airway@colData@listData$dex == "trt"]
assays(airway)[["counts"]][names(rowRanges(airway)) == "ENSG00000000003", colData(airway)$dex == "trt"]
assay(airway, "counts")["ENSG00000000003", airway$dex == "trt"]
airway |> filter(symbol == "TSPAN6", dex == "trt") |> pull(counts)

## banana.R
# scraped from https://doi.org/10.1038/nature11241
# note that intersections < 100 not included
banana_sets <- c(
	Musa = 759,
	Phoenix = 769,
	Sorghum = 827,
	Brachypodium = 387,
	Ozyza = 1246,
	Arabidopsis = 1187,
	`Musa&Phoenix` = 467,

## E-P_pairs.R
library(plyranges)

set.seed(1)
x <- data.frame(seqnames=1, start=0:9 * 100 + 1,
                width=20, id=1:10) %>%
  as_granges()

y <- data.frame(seqnames=1, start=round(runif(4,100,900)),
                width=10, id=letters[1:4]) %>%
  as_granges() %>%

## deseq2_curves.R
library(splines)
library(DESeq2)

# make some demo data
dds <- makeExampleDESeqDataSet(n=100, m=40)
dds$condition <- sort(runif(40))
# make one gene where expression has a curve shape (just for demo)
s_shape <- round(500 * sin(dds$condition*2*pi) + 1000 + rnorm(40,0,50))
mode(s_shape) <- "integer"
counts(dds)[1,] <- s_shape

## wordcloud.R
library(tm)
library(wordcloud)
crude <- scan("words", what="char", sep="\n")
crude <- gsub("/"," ",crude)
crude <- gsub("single cell","singlecell",crude)
crude <- Corpus(VectorSource(crude))
crude <- suppressWarnings(tm_map(crude, removePunctuation))
crude <- suppressWarnings(tm_map(crude, function(x) removeWords(x, stopwords())))
tdm <- TermDocumentMatrix(crude)
m <- as.matrix(tdm)

## ase_analysis_with_deseq2.R
design <- ~0 + donor + allele
dds <- DESeqDataSetFromMatrix(counts, coldata, design) # counts has the two alleles per donor
assays(dds)[["weights"]] <- weights # 1 for hets, 1e-6 for homs
sizeFactors(dds) <- rep(1, ncol(dds))
dds <- DESeq(dds, test="LRT", reduced=~0 + donor)
res <- results(dds)

## data_and_gene.R
library(plotgardener)
par <- pgParams(
  chrom = "chr1",
  chromstart = 8.2e6,
  chromend = 8.7e6,
  assembly = "hg19",
  just = c("left", "bottom")
)
dat <- data.frame(chrom="chr1", pos=840:849 * 1e4 + 1, p=1:10/10)
pageCreate(width = 4, height = 3.5, showGuides = TRUE)

## tstat.R
library(dplyr)
library(ggplot2)
library(tidyr)
library(purrr)
library(broom)
library(forcats)

d <- data.frame(
  value = rnorm(24),
  type = factor(rep(c("A","B"), c(5,3))),
	library(plyranges)
	x <- data.frame(seqnames=1,
	start=sample(1000,10,FALSE),
	width=1, id=1:10) %>%
	as_granges()
	y <- data.frame(seqnames=1,
	start=sample(1000,10,FALSE),
	width=1, id=letters[1:10]) %>%
	as_granges()
	library(plyranges)
	library(microbenchmark)
	library(dplyr)
	library(tibble)

	make_rand_gr <- function(N, grps) {
	data.frame(seqnames = sample(c("seq1", "seq2", "seq3"), N, replace = TRUE),
	strand = sample(c("+", "-", "*"), N, replace = TRUE), start = rpois(N,
	N), width = rpois(N, N), grps = sample(grps, N, replace = TRUE),
	score = runif(N)) %>% as_granges()
	airway@assays@data[["counts"]][airway@rowRanges@elementMetadata@listData$gene_id == "ENSG00000000003", airway@colData@listData$dex == "trt"]
	assays(airway)[["counts"]][names(rowRanges(airway)) == "ENSG00000000003", colData(airway)$dex == "trt"]
	assay(airway, "counts")["ENSG00000000003", airway$dex == "trt"]
	airway \|> filter(symbol == "TSPAN6", dex == "trt") \|> pull(counts)
	# scraped from https://doi.org/10.1038/nature11241
	# note that intersections < 100 not included
	banana_sets <- c(
	Musa = 759,
	Phoenix = 769,
	Sorghum = 827,
	Brachypodium = 387,
	Ozyza = 1246,
	Arabidopsis = 1187,
	`Musa&Phoenix` = 467,
	library(plyranges)

	set.seed(1)
	x <- data.frame(seqnames=1, start=0:9 * 100 + 1,
	width=20, id=1:10) %>%
	as_granges()

	y <- data.frame(seqnames=1, start=round(runif(4,100,900)),
	width=10, id=letters[1:4]) %>%
	as_granges() %>%
	library(splines)
	library(DESeq2)

	# make some demo data
	dds <- makeExampleDESeqDataSet(n=100, m=40)
	dds$condition <- sort(runif(40))
	# make one gene where expression has a curve shape (just for demo)
	s_shape <- round(500 * sin(dds$condition2pi) + 1000 + rnorm(40,0,50))
	mode(s_shape) <- "integer"
	counts(dds)[1,] <- s_shape
	library(tm)
	library(wordcloud)
	crude <- scan("words", what="char", sep="\n")
	crude <- gsub("/"," ",crude)
	crude <- gsub("single cell","singlecell",crude)
	crude <- Corpus(VectorSource(crude))
	crude <- suppressWarnings(tm_map(crude, removePunctuation))
	crude <- suppressWarnings(tm_map(crude, function(x) removeWords(x, stopwords())))
	tdm <- TermDocumentMatrix(crude)
	m <- as.matrix(tdm)
	design <- ~0 + donor + allele
	dds <- DESeqDataSetFromMatrix(counts, coldata, design) # counts has the two alleles per donor
	assays(dds)[["weights"]] <- weights # 1 for hets, 1e-6 for homs
	sizeFactors(dds) <- rep(1, ncol(dds))
	dds <- DESeq(dds, test="LRT", reduced=~0 + donor)
	res <- results(dds)
	library(plotgardener)
	par <- pgParams(
	chrom = "chr1",
	chromstart = 8.2e6,
	chromend = 8.7e6,
	assembly = "hg19",
	just = c("left", "bottom")
	)
	dat <- data.frame(chrom="chr1", pos=840:849 * 1e4 + 1, p=1:10/10)
	pageCreate(width = 4, height = 3.5, showGuides = TRUE)
	library(dplyr)
	library(ggplot2)
	library(tidyr)
	library(purrr)
	library(broom)
	library(forcats)

	d <- data.frame(
	value = rnorm(24),
	type = factor(rep(c("A","B"), c(5,3))),