Skip to content

Instantly share code, notes, and snippets.

@lucdangelis
Last active June 10, 2019 16:14
Show Gist options
  • Save lucdangelis/7331c5bb35f860cbcd4495db224de20f to your computer and use it in GitHub Desktop.
Save lucdangelis/7331c5bb35f860cbcd4495db224de20f to your computer and use it in GitHub Desktop.
library(tidyr)
library(reshape2)
library(ggplot2)
library(corrplot)
#split numeric and categorical variables
is.fact <- sapply(df1, is.factor)
df1_cat <- df1[,is.fact] %>%
mutate(count = 1) %>%
gather
df1_num <- df1[,!is.fact]
df1_num_long <- df1_num %>% melt
#plot numeric variables
num_var <- ggplot(data = df1_num_long, aes(x = value)) +
stat_density() +
facet_wrap(~variable, scales = "free") +
theme_bw()
num_var
#plot categorical variables
cat_var <- ggplot(df1_cat, aes(x = value)) + geom_bar() +
facet_wrap(~key, scales = 'free') +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
cat_var
#correlation matrix of numeric variables
corrplot(cor(df1_num)
, type = 'upper'
, tl.col = 'black'
, tl.srt = 45
, tl.cex = 0.6
, mehtod = 'number'
, addCoef.col = 'black'
, addCoefasPercent = TRUE
, number.cex=0.5)
#one hot encoding categorical variables
encoder <- onehot(df1[,is.fact])
cat_encoded <- predict(encoder, df1[,is.fact])
df_encoded <- cbind(df1_num,cat_encoded)
#correlation matrix of all variables
corrplot(cor(df_encoded)
, type = 'upper'
, tl.col = 'black'
, tl.srt = 45
, tl.cex = 0.6
, mehtod = 'number'
, addCoef.col = 'black'
, addCoefasPercent = TRUE
, number.cex=0.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment