Skip to content

Instantly share code, notes, and snippets.

@jrosen48
Created April 29, 2021 03:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jrosen48/95f5d6edd880fb1e68de13991c7791ba to your computer and use it in GitHub Desktop.
Save jrosen48/95f5d6edd880fb1e68de13991c7791ba to your computer and use it in GitHub Desktop.
prep-otter-transcript.R
library(tidyverse)
library(googlesheets4)
# this is an example Google Sheet
d <- read_sheet("https://docs.google.com/spreadsheets/d/1kz2LlLgXkN_HaBEAiFETl59b09u9AwMIgrwQx6DCv8A/edit#gid=0", col_names = FALSE)
prep_otter_transcript <- function(d, length_less_than_100_min) {
d <- d %>% rename(x1 = 1)
d2 <- d %>% slice(-nrow(d))
d3 <- d2 %>%
filter(!is.na(x1)) %>%
mutate(text = lead(x1)) %>%
mutate(to_filter = rep(c(TRUE, FALSE), nrow(.)/2)) %>%
filter(to_filter)
if (length_less_than_100_min) {
d3 %>%
mutate(time = str_sub(x1, start = -5)) %>%
mutate(speaker = str_sub(x1, end = -5)) %>%
select(time, speaker, text) %>%
mutate_all(str_trim)
} else {
d3 %>%
mutate(time = str_sub(x1, start = -6)) %>%
mutate(speaker = str_sub(x1, end = -6)) %>%
select(time, speaker, text) %>%
mutate_all(str_trim)
}
}
prep_otter_transcript(d)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment