Skip to content

Instantly share code, notes, and snippets.

@carbocation
Created January 28, 2013 04:15
Show Gist options
  • Save carbocation/4652968 to your computer and use it in GitHub Desktop.
Save carbocation/4652968 to your computer and use it in GitHub Desktop.
An examination of NYC teacher rating data
path <- "/Users/jpirruccello/Downloads"
library(ggplot2)
library(mboost)
#Load the data
y09 <- read.csv(paste(path,"TDI_20082009_FOIL_Press_JP.txt",sep="/"),sep="\t")
y10 <- read.csv(paste(path,"TDI_20092010_FOIL_Press_JP.txt",sep="/"),sep="\t")
#Identify teachers by the following, and make a merged dataframe based on the uniqueness thereof:
identifiers <- c("subject","grade","teacher_name_first_1","teacher_name_last_1")
df <- merge(y09, y10, by=identifiers)
#Keep only solo teacher records, ignoring co-teaching:
df <- subset(df, teacher_name_first_2.x == "" & teacher_name_first_2.y == "")
#4th grade only
df <- subset(df, grade == "4th Grade")
scores <- c("va_0809.x","va_0910")
df$va_diff_09_10 <- df[,scores[2]] - df[,scores[1]]
#Model using GLM's Gaussian linear regressor
model <- glm(va_0910 ~ va_0809.x,data=df)
x <- data.frame(pred=predict(model, df), real=df$va_0910)
x$diff <- x$real - x$pred
x$diffsq <- unlist(lapply(x[,"diff"],function(z){z^2}))
apply(x,2,sum)
#Model using additive boosting
m1 <- gamboost(va_0910 ~ bbs(va_0809.x) + bbs(n_0910) + bbs(pretest_0910), data=df, control = boost_control(mstop = 500))
x1 <- data.frame(pred=predict(m1, df), real=df$va_0910)
x1$diff <- x1$real - x1$pred
x1$diffsq <- unlist(lapply(x1[,"diff"],function(z){z^2}))
apply(x1,2,sum)
m2 <- gamboost(va_0910 ~ bbs(va_0809.x) + bbs(n_0910) + bbs(pretest_0910) + bbs(predicted_0910), data=df, control = boost_control(mstop = 500))
x2 <- data.frame(pred=predict(m2, df), real=df$va_0910)
x2$diff <- x2$real - x2$pred
x2$diffsq <- unlist(lapply(x2[,"diff"],function(z){z^2}))
apply(x2,2,sum)
ggplot(df,aes(x=df$va_0809.x,y=df$va_0910))+geom_density2d()
#From http://stackoverflow.com/questions/7073315/how-do-i-create-a-continuous-density-heatmap-of-2d-scatter-data-in-r
ggplot(df,aes(x=df$va_0809.x,y=df$va_0910))+
stat_density2d(aes(alpha=..level..), geom="tile") +
scale_alpha_continuous(limits=c(0,0.2),breaks=seq(0,0.2,by=0.025))+
geom_point(colour="red",alpha=0.01) +
theme_bw()
ggplot(df,aes(x=df$va_0809.x,y=df$va_0910))+
stat_density2d(aes(fill=..level..), geom="polygon") +
scale_fill_gradient(low="blue", high="green")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment