Skip to content

Instantly share code, notes, and snippets.

View stephenturner's full-sized avatar

Stephen Turner stephenturner

View GitHub Profile
@stephenturner
stephenturner / plink_commands.txt
Created November 16, 2010 21:42
plink_commands.txt
@stephenturner
stephenturner / parallelize_plink_ibd.pl
Created November 16, 2010 21:50
parallelize_plink_ibdibd.pl
@stephenturner
stephenturner / submit_ibd_pbs.pl
Created November 16, 2010 22:01
submit_ibd_pbs.pl
#!/usr/bin/perl
use strict;
if(scalar(@ARGV) != 1){
print "submit_ibd_pbs.pl <num_pbs_to_submit>\n";
exit;
}
my $LIMIT = $ARGV[0];
@stephenturner
stephenturner / qqbase.r
Created November 17, 2010 15:29
qqbase.r
# Originally posted at http://gettinggeneticsdone.blogspot.com/2010/07/qq-plots-of-p-values-in-r-using-base.html
# Define the function
ggd.qqplot = function(pvector, main=NULL, ...) {
o = -log10(sort(pvector,decreasing=F))
e = -log10( 1:length(o)/length(o) )
plot(e,o,pch=19,cex=1, main=main, ...,
xlab=expression(Expected~~-log[10](italic(p))),
ylab=expression(Observed~~-log[10](italic(p))),
xlim=c(0,max(e)), ylim=c(0,max(o)))
@stephenturner
stephenturner / logisticcurve.r
Created November 23, 2010 21:18
logisticcurve.r
x=seq(-5,5,.01)
invlogit=function(x) exp(x)/(1+exp(x))
y=invlogit(x)
plot(x,y,pch=16,ylab=expression(paste(logit^{-1},(x))))
abline(v=0)
abline(h=.5)
text(.55,.55,expression(paste("Slope is ",beta/4)),adj=c(0,0))
@stephenturner
stephenturner / randomforestdemo.r
Created February 15, 2011 22:53
randomforestdemo.r
rm(list=ls(all=TRUE))
library(randomForest)
###############Classification################
data(iris)
head(iris)
iris.rf <- randomForest(Species~., data=iris, importance=T, proximity=T)
iris.rf.subset <- randomForest(Species~., data=iris[c(1:3,5)], importance=T, proximity=T)
iris.rf.subset2 <- randomForest(Species~. -Petal.Length -Petal.Width, data=iris, importance=T, proximity=T)
print(iris.rf)
@stephenturner
stephenturner / 2011-02-15 rf adiposity.r
Created February 16, 2011 00:59
2011-02-15 rf adiposity.r
library(randomForest)
###############################################################################
############################## load functions #################################
###############################################################################
# need to document this!
rfr2 = function(randomForestModel) {
printoutput = capture.output(print(randomForestModel))
varline = grep("explained",printoutput,value=TRUE)
@stephenturner
stephenturner / permute_column.r
Created February 17, 2011 21:21
permute_column.r
# permutes a column in a data.frame, sets seed optionally
permute <- function (dataframe, columnToPermute="column", seed=NULL) {
if (!is.null(seed)) set.seed(seed)
colindex <- which(names(dataframe)==columnToPermute)
permutedcol <- dataframe[ ,colindex][sample(1:nrow(dataframe))]
dataframe[colindex] <- permutedcol
return(dataframe)
}
@stephenturner
stephenturner / splitdf.randomize.r
Created February 19, 2011 02:01
splitdf.randomize.r
#splitdf splits a data frame into a training and testing set.
#returns a list of two data frames: trainset and testset.
#you can optionally apply a random seed.
splitdf <- function(dataframe, seed=NULL, trainfrac=0.5) {
if (trainfrac<=0 | trainfrac>=1) stop("Training fraction must be between 0 and 1, not inclusive")
if (!is.null(seed)) set.seed(seed)
index <- 1:nrow(dataframe)
trainindex <- sample(index, trunc(length(index)/(1/trainfrac)))
trainset <- dataframe[trainindex, ]
testset <- dataframe[-trainindex, ]
@stephenturner
stephenturner / RF vs lm and FS 1.r
Created March 1, 2011 19:31
RF vs lm and FS 1.r
> #First define the function
> rsq <- function (yhat, y) 1 - sum((y - yhat)^2)/sum((y - mean(y))^2)
>
> # first, fit a stepwise model and test it on new data
> totfat1.intonly <- lm(CRC_FAT_TOT~1, data=totfat1)
> totfat1.full <- lm(CRC_FAT_TOT~., data=totfat1)
> totfat1.step <- step(totfat1.intonly, scope=list(upper=totfat1.full), trace=0)
> rsq(predict(totfat1.step,totfat2), totfat2$CRC_FAT_TOT)
[1] 0.6991431
>