Skip to content

Instantly share code, notes, and snippets.

@chiral

chiral/prediction1.R

Last active Oct 31, 2016
Embed
What would you like to do?
sample program for category data analysis, made for a challenge in Kaggle.
library(dplyr)
library(ggplot2)
library(reshape2)
train <- read.csv("train.csv")
#test <- read.csv("test_v2.csv")
az <- train %.%
filter(record_type==1) %.%
select(A:G)
cramer.coe <- function(dat){
row.sum <- apply(dat, 1, sum)
col.sum <- apply(dat, 2, sum)
row.length <- length(dat[,1])
col.length <- length(dat[1,])
my.sum = 0
for(i in 1:row.length){
for(j in 1:col.length){
num <- dat[i,j]^2 / (row.sum[i] * col.sum[j])
my.sum <- my.sum + num
}
}
coe <- sqrt((my.sum - 1) / (row.length - 1))
coe
}
dcast.coe <- function(dat,form) {
d <- dcast(dat,form,length,value.var=1)
d.m <- matrix(as.double(as.matrix(d[,2:ncol(d)])),nrow(d),ncol(d)-1)
cramer.coe(d.m)
}
cols1 <- names(az)
cols2 <- names(az)
df <- data.frame()
for (c1 in cols1) for (c2 in cols2) {
t <- paste(c1,"~",c2)
coe <- if (c1==c2) NA else dcast.coe(az,formula(t))
#cat(paste(t,coe,"\n"))
df <- rbind(df,data.frame(c1=c1,c2=c2,coe=coe))
}
p <- ggplot(df,aes(c1,c2))+geom_tile(aes(fill=coe),color="white")+
scale_fill_gradient(low="white",high="steelblue")
print(p)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.