Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
wizzard of oz sentiment
if (!require("pacman")) install.packages("pacman")
pacman::p_load(robotstxt, dplyr, textshape, textclean, textreadr, xml2, rvest, stringi, sentimentr, magrittr, numform)
path <- 'http://www.wendyswizardofoz.com/printablescript.htm'
paths_allowed(path)
abbreviation_map <- list(
abb = c('o.s.', 'b.g.'),
rep = c('off screen', 'back ground')
)
dat <- path %>%
xml2::read_html() %>%
rvest::html_nodes('pre') %>%
rvest::html_text() %>%
stringi::stri_split_regex('(\n)+') %>%
`[[`(1) %>%
textshape::split_match_regex('^([A-Z]{3,}\\s+){0,1}([A-Z]{2,}\\b)\\s*-{1,2}', include = TRUE) %>%
lapply(function(x){
textshape::split_match_regex(x, "^(\t){4}([A-Z]{3,}\\s+){0,2}([A-Z]{2,}\\b)", include = TRUE)
}) %>%
unlist(recursive = FALSE) %>%
textshape::combine() %>%
stringi::stri_replace_all_regex('\t\t\t\t\t\t\t\tFADE OUT:.+$', '') %>%
stringi::stri_replace_all_regex('(^[^\t].+$)', '{{$1}}') %>%
stringi::stri_replace_all_regex('LAP DISSOLVE TO:', '') %>%
textshape::split_match_regex_to_transcript("^(\t){4}([A-Z']{3,}[ ]+){0,2}([A-Z']{2,}\\b)", skip = 15) %>%
dplyr::mutate_each(funs(trimws)) %>%
dplyr::mutate_at(vars(Dialogue), textclean::mgsub, pattern = abbreviation_map$abb, replacement = abbreviation_map$rep) %>%
tibble::as_tibble() %>%
mutate(
Dialogue = gsub('-{2,}', '... ', Dialogue),
Directions = sapply(stringi::stri_extract_all_regex(Dialogue, '\\{{2}.+?\\}{2}'), function(x) {
paste(gsub('(\\{{2})|(\\}{2})', '', unlist(x)), collapse = ' ')
}) %>%
ifelse(. == "NA", NA, .),
Dialogue = gsub('\\{{2}.+?\\}{2}', ' ', Dialogue)
) %>%
textshape::split_speaker(sep = 'AND') %>%
dplyr::rename(original_element_id = element_id) %>%
dplyr::select(Person, Dialogue) %>%
textshape::split_sentence()
dat %>%
textreadr::peek(n = 20, 20)
dat %>%
select(Person, element_id) %>%
distinct() %>%
count(Person) %>%
arrange(desc(n))
dat %>%
group_by(Person) %>%
summarize(n_words = sum(stringi::stri_count_words(Dialogue), na.rm = TRUE)) %>%
arrange(desc(n_words))
hash_sentiment_jockers2 <- update_polarity_table(
lexicon::hash_sentiment_jockers,
x = data.frame(x = c("witch is dead"), y = c(2), stringsAsFactors = FALSE)
)
senti_dat <- dat %>%
dplyr::rename(original_element_id = element_id) %>%
sentimentr::get_sentences() %>%
sentimentr::sentiment_by(polarity_dt = hash_sentiment_jockers2)
plot(senti_dat, low_pass_size = 10)
plot(senti_dat, scale_range = TRUE, low_pass_size = 10)
mvg <- function(y, win = 4){
len <- length(y)
out <- sapply(seq_along(y), function(i){
i <- (i-win):(i+win)
mean(y[i[i > 0 & i < len]])
})
class(out) <- c('mvg', class(out))
attributes(out)[['orig']] <- y
out
}
mvg2 <- function(x, s1 =4, s2=2, ...){
mvg(mvg(x, s1), s2)
}
event_dat <- dat %>%
mutate(id = 1:n()) %>%
textclean::keep_row(
'Dialogue',
c(
'Are you a good witch, or a bad witch',
'That way is a very nice way.',
'I\'m melting!',
'Go away and come back tomorrow!',
'Why, anybody can have a brain.',
'Oh, come back here!',
'And think to yourself',
'Why, it\'s a man!',
'What do you think you\'re doing?',
'Put \'em up!',
'and there they\'ll stay!',
'Who killed my sister?',
'soothing to the smell!',
'Can\'t you read?',
'We want to see the Wizard right away',
'We\'ll soon find the',
'What an unexpected pleasure',
'Do you suppose there is such a place',
'I want to see you and your wife right',
'her face is careworn'
)
) %>%
mutate(
event = c(
'Somewhere Over the Rainbow',
'Miss Gulch Wants Toto',
'Professor Looks into Crystal Ball',
'Meets Glinda',
'Meets Wicked Witch',
'Dorthy Aquires Slippers',
'Meets Scarecrow',
'Angry Apple Trees',
'Meets Tin Man',
'Meets Lion',
'Poppy Field',
'Arrival at Emerald City',
'Oz Refuses to See Them',
'Meets Oz',
'Witch Takes Toto',
'Wicked Witch Dies',
'Oz Breaks Promise',
'Oz Keeps Promise',
'Toto Runs Away',
'Clicks Red Slippers'
) %>% numform::f_wrap(12)
) %>%
left_join(
senti_dat %>%
mutate(
id = 1:n(),
rollave = mvg2(ave_sentiment, 8, 10),
smoothed = mvg2(rollave, 20, 60)
) %>%
select(id, rollave),
by = 'id'
)
senti_dat %>%
mutate(
rollave = mvg2(ave_sentiment, 8, 10),
smoothed = mvg2(rollave, 20, 60)
) %>%
ggplot() +
geom_hline(yintercept = 0, color = 'grey50', linetype = 'dashed', size = .65) +
geom_line(color = 'grey60', aes( x= element_id, y = rollave), size = .8, linetype = 'dotted') +
geom_line(color = 'purple', aes( x= element_id, y = smoothed), size = 1) +
geom_point(data = event_dat, aes(x = id, y = rollave), color = 'blue') +
ggrepel::geom_text_repel(data = event_dat, aes(x = id, y = rollave, label = event),
size = 3, color = 'grey50', alpha = .6) +
theme_minimal() +
scale_y_continuous(labels = numform::ff_num(digits = 1)) +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.text.y = element_text(color = 'gray65', size = 11),
axis.title.y = element_text(color = 'gray65', size = 11, angle = 0),
axis.text.x = element_blank(),
axis.ticks = element_blank()
) +
labs(
x = NULL, y = 'Sentiment',
title = 'Wizzard of Oz Sentiment',
subtitle = numform::f_wrap(c(
"The grey dotted line show less smoothed sentiment scores.",
'The purple line shows a more smoothed sentiment across the story.'
), 140, collapse = TRUE)
) +
coord_cartesian(xlim = c(0, 2580))
@trinker

This comment has been minimized.

Copy link
Owner Author

trinker commented Oct 31, 2017

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.