David Ruau bobthecat

## rcpp_speed.r
library(rbenchmark)
res <- benchmark( cosine(mat),
                  cosineRcpp(mat),
                  columns=c("test", "replications", "elapsed",
                           "relative", "user.self", "sys.self"),
                  order="relative",
                  replications=1000)

res
             test replications elapsed relative user.self sys.self

## tweet_air_pol.r
library(twitteR)
library(ggplot2)
library(grid)

# download all that you can
pol <- userTimeline('BeijingAir', n=3200)
length(pol)
# 3200

myGrep <- function(x){

## tweet_air_pol.r
library(twitteR)
library(ggplot2)
library(grid)

# download all that you can
pol <- userTimeline('BeijingAir', n=3200)
length(pol)
# 3200

myGrep <- function(x){

## expr_matrix.r
library(GEOquery); library(RankProd); library(mouse4302.db)
## Download the data from GEO
gse12499 <- getGEO('GSE12499',GSEMatrix=TRUE)
e <- exprs(gse12499[[1]])
dim(e)
[1] 45101    10

## r-tutorial.r
### R code from vignette source 'Presentation.Rnw'
### Encoding: UTF-8

###################################################
### code chunk number 1: init
###################################################
options(width=60)


###################################################

## R_tut_2.r
### R code from vignette source 'Presentation_2.Rnw'
### Encoding: UTF-8

###################################################
### code chunk number 1: init
###################################################
options(width=60)


###################################################

## religion_index.r
library(xlsx)
library(googleVis)
# I downloaded the Excel file, cleaned the headers and worked a bit
# the column title.
da <- read.xlsx("~/Downloads/religion.xlsx", sheetName=1)
rownames(da) <- da$COUNTRY.
da <- da[,-1]
religion <- data.frame(country=rep(rownames(da), 3),
  year=c(rep(2007, dim(da)[1]), rep(2009, dim(da)[1]), rep(2010, dim(da)[1])),
  GRI=c(da$GRI_2007, da$GRI_2009, da$GRI_2010),

## getting_some_data.r
library(GEOquery)
## Download the data from GEO
GDS3716 <- getGEO('GDS3716')
# transform the GDS to and expressionSet
eset <- GDS2eSet(GDS3716,do.log2=TRUE)
phenoData <- pData(eset)
# keep only the ER+ and ER-
samples <- phenoData$sample[grep("ER", phenoData$specimen)]
# subsetting the expressionSet
eset <- eset[,samples]

## empirical_FDR.r
## EXTRACTING CLASS LABELS
classLabel <- sub("^ER(.*) breast cancer", "\\1", grep("ER", phenoData$specimen, value=T))
classLabel
[1] "-" "-" "-" "-" "-" "-" "-" "-" "-" "+" "+" "+" "+" "+" "+" "+" "+" "+"

## COMPUTING P-VALUE DISTRIBUTION
minus = which(classLabel=="-")
plus = which(classLabel=="+")
p <- apply(e, 1, function(x){t.test(as.numeric(x[minus]), as.numeric(x[plus]))$p.value})

## empirical_FDR.r
## LOADING LIBRARIES FOR PARALLEL PROCESSING
library(doMC)
ncore = multicore:::detectCores()
registerDoMC(cores = ncore)

## COMPUTING THE RANDOM P-VALUE DISTRIBUTION
# How many random sampling
R=100
# Shuffling the sample labels and recomputing the p-value each time
p.rand <- foreach(i = 1:dim(e)[1], .combine=rbind) %dopar% {
	library(rbenchmark)
	res <- benchmark( cosine(mat),
	cosineRcpp(mat),
	columns=c("test", "replications", "elapsed",
	"relative", "user.self", "sys.self"),
	order="relative",
	replications=1000)

	res
	test replications elapsed relative user.self sys.self
	library(twitteR)
	library(ggplot2)
	library(grid)

	# download all that you can
	pol <- userTimeline('BeijingAir', n=3200)
	length(pol)
	# 3200

	myGrep <- function(x){
	library(GEOquery); library(RankProd); library(mouse4302.db)
	## Download the data from GEO
	gse12499 <- getGEO('GSE12499',GSEMatrix=TRUE)
	e <- exprs(gse12499[[1]])
	dim(e)
	[1] 45101 10
	### R code from vignette source 'Presentation.Rnw'
	### Encoding: UTF-8

	###################################################
	### code chunk number 1: init
	###################################################
	options(width=60)


	###################################################
	library(xlsx)
	library(googleVis)
	# I downloaded the Excel file, cleaned the headers and worked a bit
	# the column title.
	da <- read.xlsx("~/Downloads/religion.xlsx", sheetName=1)
	rownames(da) <- da$COUNTRY.
	da <- da[,-1]
	religion <- data.frame(country=rep(rownames(da), 3),
	year=c(rep(2007, dim(da)[1]), rep(2009, dim(da)[1]), rep(2010, dim(da)[1])),
	GRI=c(da$GRI_2007, da$GRI_2009, da$GRI_2010),
	library(GEOquery)
	## Download the data from GEO
	GDS3716 <- getGEO('GDS3716')
	# transform the GDS to and expressionSet
	eset <- GDS2eSet(GDS3716,do.log2=TRUE)
	phenoData <- pData(eset)
	# keep only the ER+ and ER-
	samples <- phenoData$sample[grep("ER", phenoData$specimen)]
	# subsetting the expressionSet
	eset <- eset[,samples]
	## EXTRACTING CLASS LABELS
	classLabel <- sub("^ER(.*) breast cancer", "\\1", grep("ER", phenoData$specimen, value=T))
	classLabel
	[1] "-" "-" "-" "-" "-" "-" "-" "-" "-" "+" "+" "+" "+" "+" "+" "+" "+" "+"

	## COMPUTING P-VALUE DISTRIBUTION
	minus = which(classLabel=="-")
	plus = which(classLabel=="+")
	p <- apply(e, 1, function(x){t.test(as.numeric(x[minus]), as.numeric(x[plus]))$p.value})
	## LOADING LIBRARIES FOR PARALLEL PROCESSING
	library(doMC)
	ncore = multicore:::detectCores()
	registerDoMC(cores = ncore)

	## COMPUTING THE RANDOM P-VALUE DISTRIBUTION
	# How many random sampling
	R=100
	# Shuffling the sample labels and recomputing the p-value each time
	p.rand <- foreach(i = 1:dim(e)[1], .combine=rbind) %dopar% {