Created
March 3, 2017 01:08
-
-
Save kshaffer/5e036aefa790f5d30eba6b9b01014fe1 to your computer and use it in GitHub Desktop.
Count the frequency of letters in some famous books
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(tibble) | |
library(gutenbergr) | |
# List authors in Project Gutenberg, by number of works (descending) | |
gutenberg_works() %>% | |
group_by(author) %>% | |
summarize(count = n()) %>% | |
arrange(desc(count)) | |
gutenberg_works(author == 'Twain, Mark') # List Mark Twain's works in Project Gutenberg | |
tom <- gutenberg_download(74) # The Adventures of Tom Sawyer | |
huck <- gutenberg_download(76) # Adventures of Huckleberry Finn | |
yank <- gutenberg_download(86) # A Connecticut Yankee in King Arthur's Court | |
miss <- gutenberg_download(245) # Life on the Mississippi | |
# Combine all books and chapters into a single character vector, split by letter, all lower case | |
text <- paste(c(huck$text, tom$text, yank$text, miss$text), collapse = ' ') %>% | |
tolower() %>% | |
strsplit('', useBytes=TRUE) | |
# Count number of occurrences of each letter, list count in descending order | |
letters <- as_tibble(cbind(char = unlist(text))) %>% | |
filter(char != ' ') %>% | |
group_by(char) %>% | |
summarize(count = n()) %>% | |
arrange(desc(count)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment