Skip to content

Instantly share code, notes, and snippets.

@kalebr
Last active December 23, 2015 03:49
Show Gist options
  • Save kalebr/6576200 to your computer and use it in GitHub Desktop.
Save kalebr/6576200 to your computer and use it in GitHub Desktop.
heirarchial clustering of pokemon by attributes
library(RCurl)
library(reshape2)
library(plyr)
library(ggplot2)
pokemon <- read.csv(text = getURL("https://raw.github.com/veekun/pokedex/master/pokedex/data/csv/pokemon_species.csv", ssl.verifypeer=FALSE))
pokemon_stats <- read.csv(text = getURL("https://raw.github.com/veekun/pokedex/master/pokedex/data/csv/pokemon_stats.csv", ssl.verifypeer=FALSE))
stat_names <- read.csv(text = getURL("https://raw.github.com/veekun/pokedex/master/pokedex/data/csv/stat_names.csv", ssl.verifypeer=FALSE))
pokemon <- ddply(pokemon, .(evolution_chain_id), subset, evolves_from_species_id == max(evolves_from_species_id, na.rm=T))
stat_names <- stat_names[stat_names$local_language_id == 9, c("stat_id", "name")]
pokemon_stats <- merge(pokemon_stats, stat_names)
pokemon_stats <- pokemon_stats[, c("pokemon_id", "base_stat", "name")]
colnames(pokemon_stats) <- c("pokemon_id", "value", "name")
pokemon_stats <- dcast(pokemon_stats, pokemon_id ~ name, mean )
colnames(pokemon_stats)[[1]] <- "id"
pokemon <- merge(pokemon, pokemon_stats)
pokemon <- pokemon[, c("identifier", "Attack", "Defense", "HP", "Special Attack", "Special Defense", "Speed")]
pokemon.scaled <- scale(pokemon[, -1])
hc <- hclust(dist(pokemon.scaled))
plot(hc, labels=pokemon$identifier, hang=-1)
pokemon$groups <- cutree(hc, k=6)
df <- melt(ddply(pokemon[, 2:8], .(groups), numcolwise(mean)), id=c("groups"))
ggplot(df, aes(variable, value, fill=variable)) + geom_bar(stat="identity") + facet_grid(groups ~ .) + coord_flip() + theme(legend.position="none")
pokemon$class <- factor(pokemon$groups, labels=c("Middle Road", "Speed Demons", "Bench", "Hard Knocks", "Meat Shield", "Bulky Attackers"))
pokemon$identifier <- as.character(pokemon$identifier)
split(pokemon$identifier, pokemon$class)
# part 2
v <- function(n) {
pokemon$groups <- cutree(hc, k=n)
dd <- ddply(pokemon, .(groups), numcolwise(var))
return(mean(colMeans(dd[, 2:7], na.rm=T), na.rm=T))
}
dv <- data.frame(n=1:nrow(pokemon), var=sapply(1:nrow(pokemon), function (x) { v(x) }))
ggplot(dv, aes(n, var)) + geom_line()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment