Skip to content

Instantly share code, notes, and snippets.

@rpietro
Created August 2, 2015 11:54
Show Gist options
  • Save rpietro/279d6863356a5168a36a to your computer and use it in GitHub Desktop.
Save rpietro/279d6863356a5168a36a to your computer and use it in GitHub Desktop.
Script from a blog post on classifiers
# http://freakonometrics.hypotheses.org/20002
n = 500
set.seed(1)
X = rnorm(n)
ma = 10-(X+1.5)^2*2
mb = -10+(X-1.5)^2*2
M = cbind(ma,mb)
set.seed(1)
Z = sample(1:2,size=n,replace=TRUE)
Y = ma*(Z==1)+mb*(Z==2)+rnorm(n)*5
df = data.frame(Z=as.factor(Z),X,Y)
df1 = training = df[1:300,]
df2 = testing = df[301:500,]
plot(df1$X,df1$Y,pch=19,col=c(rgb(1,0,0,.4), rgb(0,0,1,.4))[df1$Z])
library(rpart)
fit = rpart(Z~X+Y,data=df1)
pred = function(x,y) predict(fit,newdata=data.frame(X=x,Y=y))[,1]
vx=seq(-3,3,length=101)
vy=seq(-25,25,length=101)
z=matrix(NA,length(vx),length(vy))
for(i in 1:length(vx)){
for(j in 1:length(vy))
{z[i,j]=pred(vx[i],vy[j])}}
image(vx,vy,z,axes=FALSE,xlab="",ylab="")
points(df1$X,df1$Y,pch=19,col=c(rgb(1,0,0,.4), rgb(0,0,1,.4))[df1$Z])
Y1=as.numeric(df1$Z)-1
Y2=as.numeric(df2$Z)-1
library(ROCR)
S1 = predict(fit,newdata=df1)[,1]
S2 = predict(fit,newdata=df2)[,1]
pred <- prediction( S2, Y2 )
perf <- performance( pred, "tpr", "fpr" )
plot( perf )
pred <- prediction( S1, Y1 )
perf <- performance( pred, "tpr", "fpr" )
plot( perf ,add=TRUE,col="grey")
library(randomForest)
fit=randomForest(Z~X+Y,data=df1)
pred=function(x,y) predict(fit,newdata=data.frame(X=x,Y=y), type="prob")[,2]
fit=glm(Z~X+Y,data=df1,family=binomial)
pred=function(x,y)
predict(fit,newdata=data.frame(X=x,Y=y),
type="response")
library(MASS)
fit=lda(Z~X+Y,data=df1,family=binomial)
pred=function(x,y)
predict(fit,newdata=
data.frame(X=x,Y=y))$posterior[,2]
fit=qda(Z~X+Y,data=df1,family=binomial)
pred=function(x,y)
predict(fit,newdata=
data.frame(X=x,Y=y))$posterior[,2]
library(caret)
fit=knn3(Z~X+Y,data=df1,k=9)
pred=function(x,y)
predict(fit,newdata=data.frame(X=x,Y=y))[,2]
library(mgcv)
fit=gam(Z~s(X,Y),data=df1,family=binomial)
pred=function(x,y)
predict(fit,newdata=data.frame(X=x,Y=y),
type="response")
library(dismo)
df1$Z01 = 1*(df1$Z=="2")
fit=gbm.step(data=df1, gbm.x = 2:3, gbm.y = 4,
family = "bernoulli", tree.complexity = 5,
learning.rate = 0.01, bag.fraction = 0.5)
pred = function(x,y)
predict(fit,newdata=data.frame(X=x,Y=y),
type="response",n.trees=400)
FIT=list()
for(i in 1:n)
FIT[[i]] = rpart(Z~X+Y,data=df[-i,])
predict_i = function(i)
predict(FIT[[i]],newdata=df[i,])[,2]
S = Vectorize(predict_i)(1:n)
Y = as.numeric(df$Z)-1
library(ROCR)
pred = prediction( S, Y )
perf = performance( pred, "tpr", "fpr" )
plot( perf )
FIT=list()
for(i in 1:n)
FIT[[i]] = randomForest(Z~X+Y,data=df[-i,])
predict_i = function(i)
predict(FIT[[i]],newdata=df[i,],
type="prob")[,2]
S = Vectorize(predict_i)(1:n)
FIT=list()
for(i in 1:n)
FIT[[i]] = glm(Z~X+Y,data=df[-i,],
family=binomial)
predict_i = function(i)
predict(FIT[[i]],newdata=df[i,],
type="response")
S = Vectorize(predict_i)(1:n)
FIT=list()
for(i in 1:n)
FIT[[i]] = lda(Z~X+Y,data=df[-i,],
family=binomial)
predict_i = function(i)
predict(FIT[[i]],newdata=df[i,])$posterior[,2]
S = Vectorize(predict_i)(1:n)
FIT=list()
for(i in 1:n)
FIT[[i]] = qda(Z~X+Y,data=df[-i,],
family=binomial)
predict_i = function(i)
predict(FIT[[i]],newdata=df[i,])$posterior[,2]
S = Vectorize(predict_i)(1:n)
FIT=list()
for(i in 1:n)
FIT[[i]] = knn3(Z~X+Y,data=df[-i,],k=5)
predict_i = function(i)
predict(FIT[[i]],newdata=df[i,])[,2]
S = Vectorize(predict_i)(1:n)
VS = rep(NA,n)
for(i in 1:n){
FIT = gbm.step(data=df[-i,],
gbm.x = 2:3, gbm.y = 4, family = "bernoulli",
tree.complexity = 5, learning.rate = 0.01,
bag.fraction = 0.5)
VS[i] = predict(FIT,newdata=df[i,],
n.trees=400)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment