Skip to content

Instantly share code, notes, and snippets.

@therohk
Created October 21, 2019 12:42
Show Gist options
  • Save therohk/d844cc10c09fa2d203def3466d044189 to your computer and use it in GitHub Desktop.
Save therohk/d844cc10c09fa2d203def3466d044189 to your computer and use it in GitHub Desktop.
plot_ungram_tfidf = function(dfin, start, stend, slice_unit="year", nmin=2, ntop=10, ncol=3) {
# ---- publish-date-ungram-tfidf
ungram_tf_idf <- dfin %>%
subset(select=c(ungram,publish_date)) %>%
mutate(Date = as.Date(substr(publish_date,1,8), format="%Y%m%d")) %>%
filter(Date>=start & Date<=stend) %>%
mutate(time_slice = floor_date(Date, unit = slice_unit)) %>%
mutate(time_slice = format(time_slice, "%Y-%m-%d")) %>%
count(time_slice, ungram) %>%
filter(between(n,nmin,max(n))) %>%
bind_tf_idf(ungram, time_slice, n) %>%
arrange(desc(tf_idf))
ungram_tf_idf.plot <- ungram_tf_idf %>%
mutate(ungram = factor(ungram, levels = rev(unique(ungram)))) %>%
arrange(time_slice, tf_idf) %>%
group_by(time_slice) %>%
top_n(n = ntop) %>%
ungroup()
plot_title <- paste("Ntop", ntop, "ungram tf-idf scores", start, "to", stend, "slby", slice_unit)
ungram_tf_idf.plot %>%
ggplot(aes(x = ungram, y = tf_idf, fill = time_slice)) +
geom_col(show.legend = FALSE) +
labs(x = "un.grm", y = "tf-idf") +
facet_wrap(~time_slice, ncol = ncol, scales = "free") +
theme(text = element_text(size = 10)) +
ggtitle(plot_title) +
coord_flip()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment