Skip to content

Instantly share code, notes, and snippets.

@chiral
Last active October 31, 2016 09:58
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chiral/10007361 to your computer and use it in GitHub Desktop.
Save chiral/10007361 to your computer and use it in GitHub Desktop.
sample program for category data analysis, made for a challenge in Kaggle.
library(dplyr)
library(ggplot2)
library(reshape2)
train <- read.csv("train.csv")
#test <- read.csv("test_v2.csv")
az <- train %.%
filter(record_type==1) %.%
select(A:G)
cramer.coe <- function(dat){
row.sum <- apply(dat, 1, sum)
col.sum <- apply(dat, 2, sum)
row.length <- length(dat[,1])
col.length <- length(dat[1,])
my.sum = 0
for(i in 1:row.length){
for(j in 1:col.length){
num <- dat[i,j]^2 / (row.sum[i] * col.sum[j])
my.sum <- my.sum + num
}
}
coe <- sqrt((my.sum - 1) / (row.length - 1))
coe
}
dcast.coe <- function(dat,form) {
d <- dcast(dat,form,length,value.var=1)
d.m <- matrix(as.double(as.matrix(d[,2:ncol(d)])),nrow(d),ncol(d)-1)
cramer.coe(d.m)
}
cols1 <- names(az)
cols2 <- names(az)
df <- data.frame()
for (c1 in cols1) for (c2 in cols2) {
t <- paste(c1,"~",c2)
coe <- if (c1==c2) NA else dcast.coe(az,formula(t))
#cat(paste(t,coe,"\n"))
df <- rbind(df,data.frame(c1=c1,c2=c2,coe=coe))
}
p <- ggplot(df,aes(c1,c2))+geom_tile(aes(fill=coe),color="white")+
scale_fill_gradient(low="white",high="steelblue")
print(p)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment