Michael Love mikelove

## mpra_json.R
library(jsonlite)
library(readr)
dat <- read_csv("IGVF MPRA FG designed sequence metadata - Sheet1.csv")
dat$constraints <- as.list(dat$constraints)
dat$constraints[[1]] <- list(required=TRUE)
dat$constraints[[2]] <- list(required=TRUE)
dat$constraints[[3]] <- list(
  required=TRUE,
  enum=strsplit(sub(".*enum: \\[(.*)\\]","\\1",dat$constraints[[3]]),", ")[[1]]
)

## fluent-genomics-v2.qmd
---
title: "Differential chromatin accessibility and gene expression"
format: html
---

# Differential expression from RNA-seq

```{r}
#| eval: FALSE
dir <- system.file("extdata", package="macrophage")

## hg38_seqlens.tsv

          
            chr1
            248956422

            
              chr2
              242193529

            
              chr3
              198295559

            
              chr4
              190214555

            
              chr5
              181538259

            
              chr6
              170805979

            
              chr7
              159345973

            
              chr8
              145138636

            
              chr9
              138394717

            
              chr10
              133797422

## segment_example.R
library(plyranges)
library(nullranges)

x <- data.frame(
  seqnames=rep(c("1","2","3"), each=10),
  start=rep(c(0:4, 10:14) * 1000 + 1, times=3),
  width=100) |>
  as_granges()

seqlengths(x) <- c("1"=20123, "2"=20123, "3"=20123)

## join_se.R
library(SummarizedExperiment)
library(plyranges)

# example data
m <- matrix(rnorm(600), nrow=100)
r1 <- data.frame(seqnames=1, start=1:50 * 100 + 2501,
                 width=5, id1=paste0("u",formatC(1:50,width=3,flag="0"))) |>
  as_granges()
r2 <- data.frame(seqnames=1, start=1:100 * 100 + 1,
                 width=5, id2=paste0("v",formatC(1:100,width=3,flag="0"))) |>

## 1_app.R
library(shiny)
library(UpSetR)
library(dplyr)
library(tidyr)
library(readr)
library(ggplot2)
library(pheatmap)
dat <- read_delim("ancestry_dataframe.tsv")

ui <- fluidPage(

## frozen_vst.R
mat <- matrix(rnbinom(2e5, mu=100, size=1/.01), ncol=100)

library(DESeq2)

d <- DESeqDataSetFromMatrix(mat, DataFrame(x=rep(1,100)), ~1)
# library size correction, centered log ratio to reference sample
d <- estimateSizeFactors(d)
# variance
d <- estimateDispersionsGeneEst(d)
# trend

## element_level.R
set.seed(5)

n <- 1000
reps <- 10
rna <- matrix(
  rnbinom(n * reps, mu = 10, size = 100),
  ncol=reps
)
dna <- matrix(
  rnbinom(n * reps, mu = 10, size = 100),

## R_Bioc_tidy_data.R
# dataframes vs lm S3 vs Bioc S4
# Michael Love
# Nov 1 2023

dat <- data.frame(genotype=c("wt","wt","mut","mut"),
                  count=c(10,20,30,40),
                  score=c(-1.2,0,3.4,-5),
                  gene=c("Abc","Abc","Xyz","Xyz"))
library(tibble)
dat |> as_tibble()

## tree_example.Rmd
---
title: "Toy tree example for collapsing"
author: "Michael Love"
---

Example data with 20 inferential replicates, here we just have 1
sample per condition and we calculate the LFC at each level of the
tree.

From the below simulation setup (see first chunk), the true DE signal
	library(jsonlite)
	library(readr)
	dat <- read_csv("IGVF MPRA FG designed sequence metadata - Sheet1.csv")
	dat$constraints <- as.list(dat$constraints)
	dat$constraints[[1]] <- list(required=TRUE)
	dat$constraints[[2]] <- list(required=TRUE)
	dat$constraints[[3]] <- list(
	required=TRUE,
	enum=strsplit(sub(".enum: \\[(.)\\]","\\1",dat$constraints[[3]]),", ")[[1]]
	)
	---
	title: "Differential chromatin accessibility and gene expression"
	format: html
	---

	# Differential expression from RNA-seq

	```{r}
	#\| eval: FALSE
	dir <- system.file("extdata", package="macrophage")
	chr1	248956422
	chr2	242193529
	chr3	198295559
	chr4	190214555
	chr5	181538259
	chr6	170805979
	chr7	159345973
	chr8	145138636
	chr9	138394717
	chr10	133797422
	library(plyranges)
	library(nullranges)

	x <- data.frame(
	seqnames=rep(c("1","2","3"), each=10),
	start=rep(c(0:4, 10:14) * 1000 + 1, times=3),
	width=100) \|>
	as_granges()

	seqlengths(x) <- c("1"=20123, "2"=20123, "3"=20123)
	library(SummarizedExperiment)
	library(plyranges)

	# example data
	m <- matrix(rnorm(600), nrow=100)
	r1 <- data.frame(seqnames=1, start=1:50 * 100 + 2501,
	width=5, id1=paste0("u",formatC(1:50,width=3,flag="0"))) \|>
	as_granges()
	r2 <- data.frame(seqnames=1, start=1:100 * 100 + 1,
	width=5, id2=paste0("v",formatC(1:100,width=3,flag="0"))) \|>
	library(shiny)
	library(UpSetR)
	library(dplyr)
	library(tidyr)
	library(readr)
	library(ggplot2)
	library(pheatmap)
	dat <- read_delim("ancestry_dataframe.tsv")

	ui <- fluidPage(
	mat <- matrix(rnbinom(2e5, mu=100, size=1/.01), ncol=100)

	library(DESeq2)

	d <- DESeqDataSetFromMatrix(mat, DataFrame(x=rep(1,100)), ~1)
	# library size correction, centered log ratio to reference sample
	d <- estimateSizeFactors(d)
	# variance
	d <- estimateDispersionsGeneEst(d)
	# trend
	set.seed(5)

	n <- 1000
	reps <- 10
	rna <- matrix(
	rnbinom(n * reps, mu = 10, size = 100),
	ncol=reps
	)
	dna <- matrix(
	rnbinom(n * reps, mu = 10, size = 100),
	# dataframes vs lm S3 vs Bioc S4
	# Michael Love
	# Nov 1 2023

	dat <- data.frame(genotype=c("wt","wt","mut","mut"),
	count=c(10,20,30,40),
	score=c(-1.2,0,3.4,-5),
	gene=c("Abc","Abc","Xyz","Xyz"))
	library(tibble)
	dat \|> as_tibble()
	---
	title: "Toy tree example for collapsing"
	author: "Michael Love"
	---

	Example data with 20 inferential replicates, here we just have 1
	sample per condition and we calculate the LFC at each level of the
	tree.

	From the below simulation setup (see first chunk), the true DE signal