Skip to content

Instantly share code, notes, and snippets.

@jwinternheimer
Created August 4, 2015 19:01
Show Gist options
  • Save jwinternheimer/a1a6320f08f7287c059d to your computer and use it in GitHub Desktop.
Save jwinternheimer/a1a6320f08f7287c059d to your computer and use it in GitHub Desktop.
library(data.table); library(dplyr); library(tidyr); library(ggplot2);library(NbClust)
## Read Data
data <- read.table("~/Downloads/business_users.csv",sep=",",header=T)
names(data) <- c("user_id","team_members","profiles","months_paying","plan_days","updates","updates_per_day_per_profile")
## Prep Data
data[is.na(data)] <- 0
## Separate Variables We're Interested In
features <- data %>%
select(-user_id,-plan_days,-updates,-months_paying)
## Find how many clusters to use with wssplot
wssplot <- function(data, nc=15, seed=1234){
wss <- (nrow(data)-1)*sum(apply(data,2,var))
for (i in 2:nc){
set.seed(seed)
wss[i] <- sum(kmeans(data, centers=i)$withinss)}
plot(1:nc, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")}
wssplot(features)
## NbClust
nc <- NbClust(features, min.nc=2, max.nc=15, method="kmeans")
table(nc$Best.n[1,])
barplot(table(nc$Best.n[1,]),
xlab="Numer of Clusters", ylab="Number of Criteria",
main="Number of Clusters Chosen")
## K-means Clustering
set.seed(1234)
results <- kmeans(features,3)
results
## Add cluster to original dataset
data$cluster <- as.factor(results$cluster)
## Plot number of profiles and team members for each cluster
ggplot(data,aes(x=profiles,y=team_members,color=cluster)) +
geom_point(size=) +
scale_x_continuous(limits=c(0,50)) +
labs(x="Number of Profiles",y="Number of Team Members") +
theme_minimal()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment