Skip to content

Instantly share code, notes, and snippets.

@dgrapov
Last active March 2, 2018 15:31
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dgrapov/5384152 to your computer and use it in GitHub Desktop.
Save dgrapov/5384152 to your computer and use it in GitHub Desktop.
#needed packages
library(andrews)
library(ggplot2)
#get some data here I use "mtcars"
data<-mtcars
fct<-as.factor(mtcars$cyl) # factor for intial grouping
#get andrews encoding (x and y coords)
make.andrews<-function (df, type = 1, clr = NULL, step = 100, ymax = 10,...)
{
#add objects to store x and y
storelist<-list()
if (step < 1)
step <- 100
n <- dim(df)[1]
m <- dim(df)[2]
#plot.new()
if ((type == 1) | (type == 2) | (type == 4)) {
xmin <- (-pi)
xmax <- pi
}
else if (type == 3) {
xmin <- 0
xmax <- 4 * pi
}
# plot.window(c(xmin, xmax), c(-ymax, ymax))
# title(main = main, sub = sub)
# axis(1)
# axis(2)
# box()
# lines(c(xmin, xmax), c(0, 0))
for (i in 1:m) df[, i] <- normalize(df[, i])
clx <- rep(1, n)
if (!is.null(clr)) {
if (!is.numeric(df[, clr])) {
for (i in 1:n) {
for (a in 1:nlevels(df[, clr])) if (levels(df[,
clr])[a] == df[i, clr])
clx[i] <- rainbow(nlevels(df[, clr]))[a]
}
}
else {
for (i in 1:n) clx[i] <- hsv(0, 1, df[i, clr])
}
}
dfm <- numarray(df)
m <- dim(dfm)[2]
coorx <- 0:step
for (p in 0:step) coorx[p + 1] <- (xmin + (xmax - xmin)/step *
p)
coory <- 0:step
for (i in 1:n) {
for (p in 0:step) {
coory[p + 1] <- 0
tt <- (xmin + (xmax - xmin)/step * p)
for (a in 1:m) {
if (type == 1) {
if (a == 1) {
coory[p + 1] <- dfm[i, a]/(2^0.5)
}
else {
cnst <- (a - 2)%/%2 + 1
if ((a - 2)%%2 == 0)
coory[p + 1] <- coory[p + 1] + dfm[i, a] *
sin(cnst * tt)
else coory[p + 1] <- coory[p + 1] + dfm[i,
a] * cos(cnst * tt)
}
}
else if (type == 2) {
cnst <- (a - 1)%/%2 + 1
if ((a - 1)%%2 == 0)
coory[p + 1] <- coory[p + 1] + dfm[i, a] *
sin(cnst * tt)
else coory[p + 1] <- coory[p + 1] + dfm[i,
a] * cos(cnst * tt)
}
else if (type == 3) {
coory[p + 1] <- coory[p + 1] + dfm[i, a] *
cos((a * tt)^0.5)
}
else if (type == 4) {
if (a == 1) {
coory[p + 1] <- dfm[i, a]
}
else {
cnst <- (a - 2)%/%2 + 1
if ((a - 2)%%2 == 0)
coory[p + 1] <- coory[p + 1] + dfm[i, a] *
(sin(cnst * tt) + cos(cnst * tt))
else coory[p + 1] <- coory[p + 1] + dfm[i,
a] * (sin(cnst * tt) - cos(cnst * tt))
}
coory[p + 1] <- coory[p + 1]/(2^0.5)
}
}
}
#lines(coorx, coory, col = clx[i])
storelist[[i]]<-data.frame(x=coorx, y=coory)
}
return(storelist)
}
#plot
plot.data<-function(data,meta.data = NULL, kind="andrews",...){
#change plotting and returned object using kind
#kind = c("andrews-polygon","andrews-line","PCA","andrews-PCA")
# meta data will be used to coor groups
# can be multi column data frame, in this case it is collapsed to single factor
#plotting theme for convenience (base'ish)
.theme2<- theme(
axis.line = element_line(colour = 'gray', size = .75),
panel.background = element_blank(),
plot.background = element_blank()
)
#meta data needs to be separated from numeric
num.id<-c(1:ncol(data))[sapply(data,is.numeric)] # numeric columns
df<-data[,num.id]
if(is.null(meta.data)){
meta<-data[,-(num.id),drop=FALSE] # look for factors
if(length(meta)==0) {meta<-data.frame(rep(1,nrow(data)))} # fix emptyness
} else {
meta<-as.data.frame(meta.data)
}
#need to combine meta to 1 column
if(ncol(meta)>1) {
meta<-data.frame(sapply(1:nrow(meta),function(i) {
paste(as.character(unlist(meta[i,,drop=FALSE])),collapse = "|")
}))
}
if(kind == "pairs"){
meta<-as.factor(as.character(unlist(meta)))
.rainbow<-function(x,alpha=NULL,...){
if(is.null(alpha)){alpha<-1}
rainbow(nlevels(x),alpha=alpha)[x]
}
color<-.rainbow(meta,...)
plot(df,bg=color,col=color,...)
}
if(kind == "andrews-line"){
a.data<-make.andrews(df,...) # makes list of x and y for each row
#need to add meta data cols to x and y to keep track
tmp<-lapply(1:length(a.data),function(i){
cbind(a.data[[i]],meta=factor(meta[i,],levels=levels(as.factor(meta[,1]))),row.id=i)
})
obj<-do.call("rbind",tmp)
#plot
#intersting to change line width
p<-ggplot(data=obj, aes(x=x, y=y,group=row.id,color=meta)) + #change group = meta for an interesting effct
geom_line(...) + #geom_point(,...) +
.theme2#
print(p)
return(obj)
}
if(kind == "andrews-polygon"){
a.data<-make.andrews(df,...) # makes list of x and y for each row
#need to add meta data cols to x and y to keep track
tmp<-lapply(1:length(a.data),function(i){
cbind(a.data[[i]],meta=factor(meta[i,],levels=levels(as.factor(meta[,1]))),row.id=i)
})
obj<-do.call("rbind",tmp)
#plot
#intersting to change line width
p<-ggplot(data=obj, aes(x=x, y=y,group=meta,color=meta)) + #change group = meta for an interesting effct
geom_line(size=4,aes(color=meta),...) + geom_point(,...) +
.theme2#
print(p)
return(obj)
}
#PCA scores of andrews encoded data?? does the encoding help? define help?
# need to consider how to combine x an y
# could be a combination or relationship between
# try separeted
if(kind == "andrews-PCA"){
a.data<-make.andrews(df,...) # makes list of x and y for each row
#need to make vector combine x and y
tmp<-lapply(1:length(a.data),function(i){
c(a.data[[i]][,1],a.data[[i]][,2])
})
obj<-do.call("rbind",tmp)
#make PCA scores
#scaling changes effects so don't
full<-prcomp(obj)
scores<-data.frame(full$x, meta=as.factor(meta[,1]))
#plot
p<-ggplot(data=scores, aes(x=PC1, y=PC2 ,group=meta,color=meta)) +
geom_point(...) +
.theme2
print(p)
return(full)
}
if(kind == "PCA"){
#plot PCA scores
full<-prcomp(df)
scores<-data.frame(full$x, meta=as.factor(meta[,1]))
#intersting to change line width
p<-ggplot(data=scores, aes(x=PC1, y=PC2 ,group=meta,color=meta)) +
geom_point(...) +
.theme2
print(p)
return(full)
}
}
#pairs plot for the first 5 columns coloring on number of cylinders
x11() # turn these off to prevent making new plotting device
plot.data(data,kind="pairs",meta.data=fct, cex=1, alpha=.5, pch=21)
#make classical andrews plot
x11()
andrews(cbind(fct,data),type=1,clr=1,ymax=4.25) # add cyl as factor to allow coloring
# factor for legend (colors)
legend("topleft",legend=levels(fct), fill=rainbow(length(levels(as.factor(mtcars$cyl)))) )
#alteranative visualization of andrews plot
library(ggplot2)
x11()
plot.data(data,kind="andrews-polygon",meta.data=fct,type=1, size=1, alpha=.5) #andrews
#make PCA
x11()
plot.data(data,kind="PCA",meta.data=mtcars$cyl, size=5, alpha=.75)
#make pca from andrews encoded data
x11()
plot.data(data,kind="andrews-PCA",meta.data=fct, size=5, alpha=.75)
#note intersting groups structure not explained by cyl
# after some cheking find that this is perfectly explained by the mtcars$am
#very intersting that this not obvious untill after encoding
#Make combined factor
fct<-cbind(mtcars$cyl,mtcars$am)
x11()
plot.data(mtcars,kind="PCA",meta.data=fct, size=5, alpha=.75)
x11()
plot.data(mtcars,kind="andrews-PCA",meta.data=fct, size=5, alpha=.75)
# judging by the clustering patterns,
# it looks like with this encoding cyl and am can be modeled (classified) relatively well
# there also seems to be a nother hidden covariate based on the pattern for 8 cylinder and 0 am (8|0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment