Skip to content

Instantly share code, notes, and snippets.

@trinker
Last active August 29, 2015 14:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save trinker/90a60e9ee32a7e825cf5 to your computer and use it in GitHub Desktop.
Save trinker/90a60e9ee32a7e825cf5 to your computer and use it in GitHub Desktop.
Analysis of Dason's analysis of Lil Wayne
pstat <- require(pacman)
bostat <- require(BreakoutDetection)
if (!pstat) devtools::install_github("trinker/pacman")
if (!bostat) devtools::install_github("twitter/BreakoutDetection")
p_load(BreakoutDetection, dplyr, XML, qdap)
doc <- htmlParse("http://dasonk.com/2014/07/18/Lil-Jon-Analysis/")
text <- read.transcript(text = head(sapply(getNodeSet(doc, "//p"), xmlValue), -1))
sents <- sent_detect(text[[1]])
dpol <- polarity(sents)
plot(dpol)
cumpolarity <- cumulative(dpol)
res2 <- breakout(cumpolarity[[1]], min.size=4, method='multi', beta=.3, degree=0, plot=TRUE)
res2$plot ## ggplot
dev.new()
cumpolarity
cumpolarity_2 <- data.frame(
cum_mean = cumpolarity[[1]],
Time = seq_along(cumpolarity[[1]])
)
polcount <- counts(dpol)[["polarity"]]
ends2 <- c(res2[["loc"]], length(cumpolarity[[1]]))
starts2 <- c(1, head(ends2, -1))
rects2 <- data.frame(xstart = starts2, xend = ends2,
period = c("Unstable", LETTERS[1:c(-1 + length(ends2))]))
var <- rep(NA, length(cumpolarity[[1]]))
for (i in 1:nrow(rects2)) {
var[rects2[i, 1]:rects2[i, 2]] <- as.character(rects2[i,3])
}
rects2[["cumave"]] <- sapply(rects2[["xend"]], function(i) mean(cumpolarity[[1]][1:i]))
rects2[["ave"]] <- data.frame(group = var,
cummean = cumpolarity[[1]]) %>%
group_by(group) %>%
summarize(ave=mean(cummean)) %>%
with(., lookup(rects2[["period"]], group, ave))
(dplot <- plot(cumpolarity, plot=FALSE) +
geom_rect(data = rects2, aes(xmin = xstart, xmax = xend,
ymin = -Inf, ymax = Inf, fill = period), alpha = 0.17) +
geom_text(data=rects2, aes(x=(xstart + xend)/2, y=-.04,
label=period), size=3) +
guides(fill=FALSE) +
geom_vline(xintercept=c(res2[["loc"]]), alpha=.2, color="red") +
theme(
panel.grid.minor.x=element_blank(),
panel.grid.major.x=element_blank()
) +
#xlim(0, length(cumpolarity[[1]])+5)+
geom_segment(data = rects2, linetype = "longdash",
color="darkgreen", alpha=.4, size=1.25,
aes(x = xstart, y = ave, xend = xend, yend = ave)) +
scale_x_continuous(expand = c(0,0), limits = c(0, length(cumpolarity[[1]])+1)))
dplot
out <- split(sents, var)[unique(var)]
dbot <- setNames(out, paste(names(out), round(rects2[["ave"]], 4), sep=": average polarity = "))
dbot
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment