Pavel Paramonov multidis

## ggplot_legend_tweaks.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                multidis
                / ggplot_legend_tweaks.md
            
            
              Created
              December 13, 2013 21:00
            
              
                Tweaking legends in ggplot2.
              
          
    Fairly comprehensive howto on R cookbook page:
http://www.cookbook-r.com/Graphs/Legends_(ggplot2)/

  
## ggplot_pie_chart.r
gp <- ggplot(dfnum, aes(x=factor("education"), fill=factor(education)))
gp <- gp + geom_bar(width=1) + coord_polar(theta="y")
## tweak legend
gp <- gp + scale_fill_discrete(name="",
                               breaks=as.character(seq(0,1,len=6)),
                               labels=ans.educ[,2])
print(gp)

## tweak legent with colormap (color palette) modification
pie <- pie + scale_fill_brewer(palette="Set1",

## caret_install.r
install.packages("caret", dependencies = c("Depends", "Suggests"))

## bioconductor_install_update.r
source("http://bioconductor.org/biocLite.R")

## install core packages or get list of updates
biocLite()

## install specific packages by name
biocLite(c("pkg1", "pkg2"))

## if more than one Bioconductor release versions coexist: upgrade
biocLite("BiocUpgrade")

## 10_caret_multi_methods_class.r
library(caret)

## Evaluating variable subset with various classification models (caret unified func. call).
## Repeated CV-resampling is used for parameter tuning
##  unless some heuristics are used by caret (specific to each learning method).
varsubs.miscclass.eval <- function(vset, Xtra, Yfac, Xtst, Ytst, methods.caret,
                                   tune.grid.Npts=5, cv.rep=5) {
  if (!all(vset %in% colnames(Xtra))) {
    stop("varsubs.miscclass.eval error: Some supplied variable names are not present among the covariate matrix column names.")
  }

## list_as_fun_args.r
## regular case
foo <- function(a, b, c)  a + b - c ## does something
foo2 <- function(b, c)  b + c ## also some function
foo(a=1, b=2, c=5)
foo2(b=2, c=5)  ## repeating list of multiple arguments

## passing a list
arg.list <- list(b=2, c=5)
do.call(foo, c(list(a=1), arg.list))
do.call(foo2, arg.list)

## multicore_detect_cores.r
#library(multicore) ## no longer needed as of R 3.0
library(parallel)
ncores <- detectCores()

## sensitivity_specificity_ROC.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                multidis
                / sensitivity_specificity_ROC.md
            
            
              Created
              December 18, 2013 21:04
            
              
                Sensitivity, specificity, and ROC. Simple but every time looking them up to verify, hence this gist.
              
          
    Sensitivity and specificity in binary classification

Sensitivity: given that a result is truly an event, probability of predicting that event correctly.
Medical: fraction of correctly identified disease cases among all disease cases. Likelihood of healthy patient if test is negative. High sensitivity helps avoid interventions done to healthy patients.
Specificity: given that a result is truly NOT an event, probability of predicting a negative.
Medical: fraction of correctly identified healthy cases among all healthy cases. Likelihood of disease if test is positive. High specificity is essential for correctly identifying high-risk patients.

  
## split_strat_scale.r
library(caret)

## select training indices preserving class distribution
in.train <- createDataPartition(yclass, p=0.8, list=FALSE)
summary(factor(yclass))
ytra <- yclass[in.train]; summary(factor(ytra))
ytst <- yclass[-in.train]; summary(factor(ytst))

## standardize features: training parameters of scaling for test-part
Xtra <- scale(X[in.train,])

## dataframe_sel_cols_factor.r
## simply convert selected columns to factors
c <- lapply(a[,3:4], factor)

## build dataframe with some columns as factors
vset.fac <- c("name1","name2") ## names from some original dataframe d0
vset.num <- c("num1","num2")

df <- data.frame(d0[,vset.num], lapply(d0[,vset.fac],factor))

## check structure
	gp <- ggplot(dfnum, aes(x=factor("education"), fill=factor(education)))
	gp <- gp + geom_bar(width=1) + coord_polar(theta="y")
	## tweak legend
	gp <- gp + scale_fill_discrete(name="",
	breaks=as.character(seq(0,1,len=6)),
	labels=ans.educ[,2])
	print(gp)

	## tweak legent with colormap (color palette) modification
	pie <- pie + scale_fill_brewer(palette="Set1",
	source("http://bioconductor.org/biocLite.R")

	## install core packages or get list of updates
	biocLite()

	## install specific packages by name
	biocLite(c("pkg1", "pkg2"))

	## if more than one Bioconductor release versions coexist: upgrade
	biocLite("BiocUpgrade")
	library(caret)

	## Evaluating variable subset with various classification models (caret unified func. call).
	## Repeated CV-resampling is used for parameter tuning
	## unless some heuristics are used by caret (specific to each learning method).
	varsubs.miscclass.eval <- function(vset, Xtra, Yfac, Xtst, Ytst, methods.caret,
	tune.grid.Npts=5, cv.rep=5) {
	if (!all(vset %in% colnames(Xtra))) {
	stop("varsubs.miscclass.eval error: Some supplied variable names are not present among the covariate matrix column names.")
	}
	## regular case
	foo <- function(a, b, c) a + b - c ## does something
	foo2 <- function(b, c) b + c ## also some function
	foo(a=1, b=2, c=5)
	foo2(b=2, c=5) ## repeating list of multiple arguments

	## passing a list
	arg.list <- list(b=2, c=5)
	do.call(foo, c(list(a=1), arg.list))
	do.call(foo2, arg.list)
	#library(multicore) ## no longer needed as of R 3.0
	library(parallel)
	ncores <- detectCores()
	library(caret)

	## select training indices preserving class distribution
	in.train <- createDataPartition(yclass, p=0.8, list=FALSE)
	summary(factor(yclass))
	ytra <- yclass[in.train]; summary(factor(ytra))
	ytst <- yclass[-in.train]; summary(factor(ytst))

	## standardize features: training parameters of scaling for test-part
	Xtra <- scale(X[in.train,])
	## simply convert selected columns to factors
	c <- lapply(a[,3:4], factor)

	## build dataframe with some columns as factors
	vset.fac <- c("name1","name2") ## names from some original dataframe d0
	vset.num <- c("num1","num2")

	df <- data.frame(d0[,vset.num], lapply(d0[,vset.fac],factor))

	## check structure