Last active
March 26, 2022 15:36
-
-
Save mikelove/a8a86f2d020ebd33230c7bff9b26eece to your computer and use it in GitHub Desktop.
airpart demo code for fly cross
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(SingleCellExperiment) | |
sce <- readRDS("sce_metaCells.rds") | |
dim(sce) | |
assayNames(sce) | |
a1 <- as.matrix( round(assay(sce, "dmel_counts")) ) | |
a2 <- as.matrix( round(assay(sce, "counts") - a1) ) | |
a1[is.na(a1)] <- 0 | |
a2[is.na(a2)] <- 0 | |
assay(sce, "a1") <- a1 | |
assay(sce, "a2") <- a2 | |
library(scater) | |
plotReducedDim(sce, dimred="PCA", colour_by="label") | |
library(airpart) | |
sce$x <- colLabels(sce) | |
sce$original <- colLabels(sce) # save for later | |
sce <- preprocess(sce) | |
top <- head(order(rowSums(assay(sce, "counts")),decreasing=TRUE),100) | |
makeHeatmap(sce[top,]) | |
rmu <- rowMeans(assay(sce, "ratio_pseudo")) | |
rvar <- rowVars(assay(sce, "ratio_pseudo")) | |
plot(rmu, rvar) | |
idx <- rmu > .2 & rmu < .8 | |
sce <- sce[idx,] | |
# go back and re-cluster by total count | |
library(scran) | |
library(bluster) | |
bp <- SNNGraphParam(k=3, type="rank", cluster.fun="walktrap") # makes 9 | |
bp <- SNNGraphParam(k=2, type="rank", cluster.fun="walktrap") # makes 16 | |
nn.clusters <- clusterCells(sce, use.dimred="PCA", BLUSPARAM=bp) | |
table(nn.clusters) | |
colLabels(sce) <- nn.clusters | |
plotReducedDim(sce, dimred="PCA", colour_by="label") | |
# visualize | |
sce$x <- colLabels(sce) | |
sce <- sce[,order(sce$x)] | |
top <- head(order(rowSums(assay(sce, "counts")),decreasing=TRUE),200) | |
top <- head(order(rowVars(assay(sce, "ratio_pseudo")),decreasing=TRUE),400) | |
makeHeatmap(sce[top,]) | |
sce_cl <- sce[top,] | |
# find clusters of genes | |
sce_cl <- geneCluster(sce_cl, G=4) | |
sce_cl <- geneCluster(sce_cl, G=6) | |
makeHeatmap(sce_cl, genecluster=1) | |
summary <- summaryAllelicRatio(sce_cl) | |
summary[[1]] | |
# binomial is ok when theta is ~20 and larger | |
estDisp(sce_cl, genecluster=2, type="values") | |
# ~5 seconds on my machine | |
system.time({ | |
sce_sub <- fusedLasso(sce_cl, | |
model = "binomial", | |
genecluster = 2, ncores = 1) | |
}) | |
system.time({ | |
sce_sub <- fusedLasso(sce_cl, | |
model = "gaussian", | |
genecluster = 2, ncores = 1) | |
}) | |
metadata(sce_sub)$partition | |
makeHeatmap(sce_sub) | |
sce_sub <- allelicRatio(sce_sub) | |
makeViolin(sce_sub, ylim=c(.5,.85)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(SingleCellExperiment) | |
sce <- readRDS("sce_top200cells_top2000genes_variance.rds") | |
assayNames(sce) | |
plot(assay(sce, "mel_counts")[1,], assay(sce, "mel_var")[1,]) | |
library(scater) | |
plotReducedDim(sce, dimred="PCA", colour_by="label") | |
library(scran) | |
library(bluster) | |
bp <- SNNGraphParam(k=10, type="rank", cluster.fun="walktrap") # makes 6 | |
nn.clusters <- clusterCells(sce, use.dimred="PCA", BLUSPARAM=bp) | |
table(nn.clusters) | |
colLabels(sce) <- nn.clusters | |
plotReducedDim(sce, dimred="PCA", colour_by="label") | |
cols <- c("lightblue1", "goldenrod1", "royalblue1", "salmon1", "orchid1", "limegreen") | |
boxplot(assay(sce, "ratio")[1,] ~ colLabels(sce), col=cols, ylab="ratio", xlab="cluster") | |
drawAlleles <- function(i) { | |
par(mfrow=c(1,2)) | |
plot(assay(sce, "mel_counts")[i,], assay(sce, "mel_var")[i,], col=cols[colLabels(sce)], | |
xlab="Melanogaster count", ylab="boot variance", main=rownames(sce)[i]) | |
abline(0,1) | |
plot(assay(sce, "sec_counts")[i,], assay(sce, "sec_var")[i,], col=cols[colLabels(sce)], | |
xlab="Sechellia count", ylab="boot variance", main=rownames(sce)[i]) | |
abline(0,1) | |
} | |
library(genefilter) | |
ratio <- as.matrix(assay(sce, "ratio")) | |
ratio[is.nan(ratio)] <- 0.5 | |
f <- rowFtests(ratio, colLabels(sce))[,1] | |
mel_rv <- rowMeans(assay(sce, "mel_var")/(assay(sce, "mel_counts")+1)) | |
sec_rv <- rowMeans(assay(sce, "sec_var")/(assay(sce, "sec_counts")+1)) | |
rv <- .5*mel_rv + .5*sec_rv | |
rm <- rowMeans(assay(sce)) | |
plot(rv, f) | |
idx <- f > 20 | rv > 1 | |
text(rv[idx], f[idx], rownames(ratio)[idx], pos=1) | |
plot(rv, f, xlim=c(.8,2), ylim=c(-2,20)) | |
idx <- f > 20 | rv > 1 | |
text(rv[idx], f[idx], rownames(ratio)[idx], pos=1) | |
plot(rm, rv, xlim=c(10,50)) | |
which(abs(rm - 40) < 1) | |
drawAlleles(1) | |
which(rownames(sce) == "Bacc") | |
boxplot(assay(sce, "ratio")[28,] ~ colLabels(sce), col=cols, ylab="ratio", xlab="cluster") | |
drawAlleles(28) | |
which(rownames(sce) == "14-3-3zeta") | |
boxplot(assay(sce, "ratio")[42,] ~ colLabels(sce), col=cols, ylab="ratio", xlab="cluster") | |
drawAlleles(42) | |
library(fishpond) | |
sce_mel <- sce | |
assays(sce_mel) <- assays(sce_mel)[c("counts","mel_counts","mel_var")] | |
assayNames(sce_mel) <- c("counts","mean","variance") | |
sce_sec <- sce | |
assays(sce_sec) <- assays(sce_sec)[c("counts","sec_counts","sec_var")] | |
assayNames(sce_sec) <- c("counts","mean","variance") | |
par(mfrow=c(1,2), mar=c(5,5,2,1)) | |
plotInfReps(sce_mel, 37, x="label", thin=2, xlab="Melanogaster", ylim=c(0,80)) | |
par(mar=c(5,2,2,1)) | |
plotInfReps(sce_sec, 37, x="label", thin=2, xlab="Sechellia", ylim=c(0,80)) | |
par(mfrow=c(1,2), mar=c(5,5,2,1)) | |
plotInfReps(sce_mel, 42, x="label", thin=2, xlab="Melanogaster") | |
par(mar=c(5,2,2,1)) | |
plotInfReps(sce_sec, 42, x="label", thin=2, xlab="Sechellia") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment