Create a gist now

Instantly share code, notes, and snippets.

Generate integers encoded categoricals
## generate integer-encoded categoricals
for SIZE in 1; do
time R --vanilla --quiet << EOF
library(data.table)
d1 <- as.data.frame(fread("train-${SIZE}m.csv"))
d2 <- as.data.frame(fread("test.csv"))
d <- rbind(d1,d2)
for (k in c("Month","DayofMonth","DayOfWeek","UniqueCarrier","Origin","Dest")) {
d[,k] <- as.numeric(as.factor(d[,k]))-1
}
d[["dep_delayed_15min"]] <- ifelse(d[["dep_delayed_15min"]]=="Y",1,0)
dd1 <- d[1:nrow(d1),]
dd2 <- d[(nrow(d1)+1):(nrow(d1)+nrow(d2)),]
write.table(dd1, "train-intcateg-${SIZE}m.csv", row.names=FALSE, col.names=FALSE, sep=",")
write.table(dd2, "test-intcateg-${SIZE}m.csv", row.names=FALSE, col.names=FALSE, sep=",")
EOF
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment