Skip to content

Instantly share code, notes, and snippets.

View al2na's full-sized avatar
💭
fishing 🎣

Altuna Akalin al2na

💭
fishing 🎣
View GitHub Profile
@al2na
al2na / read.BSMAP.ratio.methylKit.R
Created October 5, 2012 17:50
reading BSMAP ratio files
library(methylKit)
obj=read("bsmap_output.txt",sample.id="test",assembly="mm9",header=TRUE, context="CpG", resolution="base",
pipeline=list(fraction=TRUE,chr.col=1,start.col=2,end.col=2,
coverage.col=6,strand.col=3,freqC.col=5 )
)
obj
library(sqldf)
library(doBy)
library(plyr)
library(data.table)
n<-100000
grp1<-sample(1:750, n, replace=T)
grp2<-sample(1:750, n, replace=T)
d<-data.frame(x=rnorm(n), y=rnorm(n), grp1=grp1, grp2=grp2, n,
replace=T)
@al2na
al2na / read.rrbs.bed.R
Last active December 14, 2015 10:49
read ENCODE rrbs bed file
library(methylKit)
obj = read("wgEncodeHaibMethylRrbsA549Dm002p7dHaibSitesRep1.bed.gz",
sample.id = "test", assembly = "hg19", header = FALSE,
context = "CpG", resolution = "base",
pipeline = list(fraction = FALSE, chr.col = 1, start.col = 3,
end.col = 3,coverage.col = 5, strand.col = 6,
freqC.col = 11))
@al2na
al2na / read.bed.files.rrbs.R
Created March 3, 2013 07:55
read multiple rrbs bed files
file.list = list("wgEncodeHaibMethylRrbsA549Dm002p7dHaibSitesRep1.bed.gz",
"wgEncodeHaibMethylRrbsAg04449UwstamgrowprotSitesRep1.bed9.gz")
obj = read(file.list, sample.id = list("test", "control"), treatment = c(1,0),
assembly = "hg19", header = FALSE, context = "CpG", resolution = "base",
pipeline = list(fraction = FALSE, chr.col = 1, start.col = 3, end.col = 3,
coverage.col = 5, strand.col = 6, freqC.col = 11))
obj
## methylRawList object with 2 methylRaw objects
##
## methylRaw object with 1464031 rows

Here is how you can make a table in GFM format using knitr + ascii

render_gfm()
gfm_table <- function(x, ...) {
    require(ascii)
    y <- capture.output(print(ascii(x, ...), type = "org"))
 # substitute + with | for table markup
@al2na
al2na / summarizeScoresOnRegions.R
Last active March 23, 2020 22:20
getting average score of a GRanges over another GRanges object. summarizeScores(loc.gr,score.gr,score.col)
.libPaths("/work2/gschub/altuna/RlibsDev")
require(data.table)
summarizeScores<-function(loc.gr,score.gr,score.col){
ov <- as.matrix(findOverlaps(loc.gr,score.gr))
dt=data.table(id=ov[,1],score=values(score.gr)[ov[,2],which(names(values(score.gr))==score.col)])
dt=dt[,list(av=mean(score)),by=id]
@al2na
al2na / getCMethMatrix.R
Created November 11, 2013 15:13
getting a matrix methylation status matrix via QuasR
require(data.table)
require(QuasR)
# arguments:
# proj: qProject object
# range: GRanges object with ONE!!!! range
# samp: sample.name
#
getCMethMatrix<-function(proj,range,samp){
Cs=qMeth(proj, query=range,mode="allC",reportLevel="alignment")
@al2na
al2na / tileMethylCounts2.R
Last active December 30, 2015 21:49
a version of tileMethylCounts where the chromosome names obtained from GRanges out of unique seqnames, so there will be no chr names that do not have coverage. see the discussion here: https://groups.google.com/forum/#!topic/methylkit_discussion/75y6dbrbCWI
setGeneric("tileMethylCounts2",
function(object,win.size=1000,step.size=1000,cov.bases=0)
standardGeneric("tileMethylCounts2") )
setMethod("tileMethylCounts2", signature(object="methylRaw"),
function(object,win.size,step.size,cov.bases){
g.meth =as(object,"GRanges")
chrs =as.character(unique(seqnames(g.meth)))
widths =seqlengths(g.meth)
@al2na
al2na / callPrimer3.R
Last active December 11, 2021 19:48
calling primer3 from R
#' call primer3 for a given set of DNAstringSet object
#'
#' @param seq DNAstring object, one DNA string for the given amplicon
#' @param size_range default: '151-500'
#' @param Tm melting temprature parameters default:c(55,57,58)
#' @param name name of the amplicon in chr_start_end format
#' @param primer3 primer3 location
#' @param therme.param thermodynamic parameters folder
#' @param settings text file for parameters
#' @author Altuna Akalin modified Arnaud Krebs' original function
@al2na
al2na / getOE.R
Last active January 4, 2016 02:59
get O/E ration and CpG ratio and GC content for given GRanges object and BSgenome. Returns a matrix of O/E ratio, CpGcontent and GC content
require(GenomicRanges)
require(Biostrings)
#' get OE ratio and GC content for a given set of DNAstrings
getOE.strset<-function(str.set)
{
di.mat=dinucleotideFrequency( str.set )
a.mat =alphabetFrequency( str.set ,baseOnly=TRUE )
exp=(a.mat[,2]*a.mat[,3])/width(str.set )