Skip to content

Instantly share code, notes, and snippets.

View stephenturner's full-sized avatar

Stephen Turner stephenturner

View GitHub Profile
@stephenturner
stephenturner / qqbase.r
Created November 17, 2010 15:29
qqbase.r
# Originally posted at http://gettinggeneticsdone.blogspot.com/2010/07/qq-plots-of-p-values-in-r-using-base.html
# Define the function
ggd.qqplot = function(pvector, main=NULL, ...) {
o = -log10(sort(pvector,decreasing=F))
e = -log10( 1:length(o)/length(o) )
plot(e,o,pch=19,cex=1, main=main, ...,
xlab=expression(Expected~~-log[10](italic(p))),
ylab=expression(Observed~~-log[10](italic(p))),
xlim=c(0,max(e)), ylim=c(0,max(o)))
@stephenturner
stephenturner / logisticcurve.r
Created November 23, 2010 21:18
logisticcurve.r
x=seq(-5,5,.01)
invlogit=function(x) exp(x)/(1+exp(x))
y=invlogit(x)
plot(x,y,pch=16,ylab=expression(paste(logit^{-1},(x))))
abline(v=0)
abline(h=.5)
text(.55,.55,expression(paste("Slope is ",beta/4)),adj=c(0,0))
@stephenturner
stephenturner / pvalue-from-lm-object.r
Created November 30, 2010 17:46
pvalue-from-lm-object.r
# Function to extract the overall ANOVA p-value out of a linear model object
lmp <- function (modelobject) {
if (class(modelobject) != "lm") stop("Not an object of class 'lm' ")
f <- summary(modelobject)$fstatistic
p <- pf(f[1],f[2],f[3],lower.tail=F)
attributes(p) <- NULL
return(p)
}
# simulate some data
@stephenturner
stephenturner / forestplot.r
Created February 11, 2011 02:59
forestplot.r
# d is a data frame with 4 columns
# d$x gives variable names
# d$y gives center point
# d$ylo gives lower limits
# d$yhi gives upper limits
forestplot <- function(d, xlab="Odds Ratio", ylab="Study"){
require(ggplot2)
p <- ggplot(d, aes(x=x, y=y, ymin=ylo, ymax=yhi)) +
geom_pointrange() +
coord_flip() +
@stephenturner
stephenturner / randomforestdemo.r
Created February 15, 2011 22:53
randomforestdemo.r
rm(list=ls(all=TRUE))
library(randomForest)
###############Classification################
data(iris)
head(iris)
iris.rf <- randomForest(Species~., data=iris, importance=T, proximity=T)
iris.rf.subset <- randomForest(Species~., data=iris[c(1:3,5)], importance=T, proximity=T)
iris.rf.subset2 <- randomForest(Species~. -Petal.Length -Petal.Width, data=iris, importance=T, proximity=T)
print(iris.rf)
@stephenturner
stephenturner / 2011-02-15 rf adiposity.r
Created February 16, 2011 00:59
2011-02-15 rf adiposity.r
library(randomForest)
###############################################################################
############################## load functions #################################
###############################################################################
# need to document this!
rfr2 = function(randomForestModel) {
printoutput = capture.output(print(randomForestModel))
varline = grep("explained",printoutput,value=TRUE)
@stephenturner
stephenturner / permute_column.r
Created February 17, 2011 21:21
permute_column.r
# permutes a column in a data.frame, sets seed optionally
permute <- function (dataframe, columnToPermute="column", seed=NULL) {
if (!is.null(seed)) set.seed(seed)
colindex <- which(names(dataframe)==columnToPermute)
permutedcol <- dataframe[ ,colindex][sample(1:nrow(dataframe))]
dataframe[colindex] <- permutedcol
return(dataframe)
}
@stephenturner
stephenturner / ggd_rf_example.r
Created February 18, 2011 21:56
ggd_rf_example.r
#load the iris data
data(iris)
# this data has 150 rows
nrow(iris)
# look at the first few
head(iris)
# splitdf function will return a list of training and testing sets
@stephenturner
stephenturner / splitdf.randomize.r
Created February 19, 2011 02:01
splitdf.randomize.r
#splitdf splits a data frame into a training and testing set.
#returns a list of two data frames: trainset and testset.
#you can optionally apply a random seed.
splitdf <- function(dataframe, seed=NULL, trainfrac=0.5) {
if (trainfrac<=0 | trainfrac>=1) stop("Training fraction must be between 0 and 1, not inclusive")
if (!is.null(seed)) set.seed(seed)
index <- 1:nrow(dataframe)
trainindex <- sample(index, trunc(length(index)/(1/trainfrac)))
trainset <- dataframe[trainindex, ]
testset <- dataframe[-trainindex, ]
@stephenturner
stephenturner / propmiss.r
Created February 24, 2011 03:09
propmiss.r
propmiss <- function(dataframe) {
m <- sapply(dataframe, function(x) {
data.frame(
nmiss=sum(is.na(x)),
n=length(x),
propmiss=sum(is.na(x))/length(x)
)
})
d <- data.frame(t(m))
d <- sapply(d, unlist)