Skip to content

Instantly share code, notes, and snippets.

@arvi1000
Last active November 19, 2019 17:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arvi1000/e4c2c06ad096d925bacc54981d5987ed to your computer and use it in GitHub Desktop.
Save arvi1000/e4c2c06ad096d925bacc54981d5987ed to your computer and use it in GitHub Desktop.
script to fix an out-of-order .srt caption file
setwd('~/Documents/personal/r_stuff/bcc/srt_sort/')
# processing function. input is lines of the file as char vector
fix_srt <- function(srt_file) {
# caption chunks are delimited by a blank line ''.
# so... add a blank line to the start
srt_file <- c('', srt_file)
# ...now the cumulative sum of blanks so far is a vector we can split on
srt_chunks <- split(srt_file, cumsum(srt_file == ''))
# in each chunk the 3nd element = time stamp, so we can sort on that
correct_order <- order(sapply(srt_chunks, function(x) x[3]))
srt_chunks <- srt_chunks[correct_order]
# reassign index numbers to reflect new order (2nd element of each chunk)
fixed_chunks <- lapply(seq_along(srt_chunks), function(i) {
temp_chunk <- srt_chunks[[i]]
temp_chunk[2] <- i
temp_chunk
})
# now we have the new file
fixed_srt <- unlist(fixed_chunks)
# return, less first blank line
return(fixed_srt[-1])
}
# get list of .srt files with full paths
file_list <- list.files('original_srt', pattern = 'srt$',
recursive = F, full.names = T)
outdir <- 'fixed_srt/'
# process files
for(f in file_list) {
cat('here goes', basename(f))
srt_file <- suppressWarnings(readLines(f))
fixed <- fix_srt(srt_file)
out_name <- paste0(outdir, basename(f))
cat(' =>', out_name, '\n')
writeLines(fixed, out_name)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment