Last active
November 19, 2019 17:21
-
-
Save arvi1000/e4c2c06ad096d925bacc54981d5987ed to your computer and use it in GitHub Desktop.
script to fix an out-of-order .srt caption file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd('~/Documents/personal/r_stuff/bcc/srt_sort/') | |
# processing function. input is lines of the file as char vector | |
fix_srt <- function(srt_file) { | |
# caption chunks are delimited by a blank line ''. | |
# so... add a blank line to the start | |
srt_file <- c('', srt_file) | |
# ...now the cumulative sum of blanks so far is a vector we can split on | |
srt_chunks <- split(srt_file, cumsum(srt_file == '')) | |
# in each chunk the 3nd element = time stamp, so we can sort on that | |
correct_order <- order(sapply(srt_chunks, function(x) x[3])) | |
srt_chunks <- srt_chunks[correct_order] | |
# reassign index numbers to reflect new order (2nd element of each chunk) | |
fixed_chunks <- lapply(seq_along(srt_chunks), function(i) { | |
temp_chunk <- srt_chunks[[i]] | |
temp_chunk[2] <- i | |
temp_chunk | |
}) | |
# now we have the new file | |
fixed_srt <- unlist(fixed_chunks) | |
# return, less first blank line | |
return(fixed_srt[-1]) | |
} | |
# get list of .srt files with full paths | |
file_list <- list.files('original_srt', pattern = 'srt$', | |
recursive = F, full.names = T) | |
outdir <- 'fixed_srt/' | |
# process files | |
for(f in file_list) { | |
cat('here goes', basename(f)) | |
srt_file <- suppressWarnings(readLines(f)) | |
fixed <- fix_srt(srt_file) | |
out_name <- paste0(outdir, basename(f)) | |
cat(' =>', out_name, '\n') | |
writeLines(fixed, out_name) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment