Created March 24, 2015 15:28
Aggressive characters in Harry Potter
# Load packages
# Helper functions
# Nice clean, stripped-down theme
theme_clean <- function(base_size=12, base_family="Source Sans Pro Light") {
ret <- theme_bw(base_size, base_family) +
theme(panel.background = element_rect(fill="#ffffff", colour=NA),
axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
panel.border = element_blank(), axis.line=element_blank(),
panel.grid=element_blank(), axis.ticks=element_blank(),
axis.title=element_text(size=rel(0.8), family="Source Sans Pro Semibold"),
strip.text=element_text(size=rel(1), family="Source Sans Pro Semibold"),
strip.background=element_rect(fill="#ffffff", colour=NA),
panel.margin.y=unit(1.5, "lines"))
# Append spaces to labels to fake right margin/padding
add.spaces <- function(x) {
return(as.character(paste0(x, " ")))
# Load data
# URL to the fantastic data
data.url <- ""
# Book names
nice.books <- c("The Sorcerer's Stone", "The Chamber of Secrets",
"The Prisoner of Azkaban", "The Goblet of Fire",
"The Order of the Phoenix", "The Half-Blood Prince",
"The Deathly Hallows")
# Load and rearrange the data
hp <- read.csv(textConnection(getURL(data.url)), %>%
select(1:13) %>% # Only get the first 13 columns
select(-c(g_e_m_n, evil, creature, tot)) %>% # Get rid of other columns
gather(book, aggressions, -c(Name, abb)) %>%
mutate(book = factor(book, levels=levels(book), labels=nice.books, ordered=TRUE),
book.rev = factor(book, levels=rev(levels(book)), ordered=TRUE))
# Plot data
# Find most aggressive characters
hp %>% group_by(Name) %>% summarize(Aggressions = sum(aggressions)) %>%
arrange(desc(Aggressions)) %>% head(5) -> most.aggressive
# Rearrange data for plotting <- hp %>%
filter(Name %in% most.aggressive$Name) %>%
mutate(Name = factor(Name, levels=most.aggressive$Name, ordered=TRUE),
Name.rev = factor(Name, levels=rev(levels(Name)),
labels=add.spaces(rev(levels(Name))), ordered=TRUE)) %>%
group_by(Name.rev, book.rev) %>%
summarize(aggr = sum(aggressions))
# Plot top 5
ggplot(, aes(x=book.rev, y=aggr, fill=Name.rev)) +
geom_bar(stat="identity", position="dodge") +
geom_hline(yintercept=seq(10, 50, by=10), colour="#ffffff", size=0.25) +
coord_flip() +
labs(x=NULL, y="Instances of aggression",
title="Most aggressive characters in the Harry Potter series") +
scale_fill_manual(values=c("#915944", "#9BAB9C", "#692521", "#CAAA17", "#B51616"),
guide=guide_legend(reverse=TRUE), name="") +
theme_clean() + theme(legend.key.size = unit(0.5, "lines")) -> nice.plot
# Save plot
ggsave(nice.plot, filename="top_5.png", width=7, height=5, units="in")
ggsave(nice.plot, filename="top_5.pdf", width=7, height=5, units="in", device=cairo_pdf)
how you got the csv data file? was it available or you created it? Anyways, awesome work!!

