Last active
October 16, 2019 07:32
-
-
Save candale/987b04c910d2d5afb7c17c3475d2a435 to your computer and use it in GitHub Desktop.
check_sub.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Run: python3 check_sub.py path_to.srt | |
Give the following: | |
- error if the first line in file is a number with no spaces around it | |
- error if there are two blank lines between two subtitle chunks | |
- error if sync info matches the standard format, with no spaces at the | |
beginning or end | |
- error if a subtitle chunk has more than three subtitles lines | |
- warning if a subtitle chunk has no subtitle lines | |
- warning if a subtitle chunk has three subtitle lines | |
When an error is given, fix the error and run the script again. If the script | |
encounters an error, it stops processing the file. | |
""" | |
import sys | |
import re | |
def check_chunk(chunk_count, start_line, file): | |
assert start_line.strip().isdigit(), ( | |
'Something wrong at chunk on line {}. Expected number but got {}' | |
.format(chunk_count, start_line.strip()) | |
) | |
timing = next(f) | |
assert re.match( | |
( | |
r'^\d{1,2}:\d\d{1,2}:\d\d{1,2},\d{1,3}' | |
r'\s-->' | |
r'\s\d{1,2}:\d\d{1,2}:\d\d{1,2},\d{1,3}$' | |
), | |
timing | |
), 'TIming is wrong at chunk on line {}'.format(chunk_count) | |
count = 0 | |
while True: | |
try: | |
sub = next(f) | |
except StopIteration: | |
break | |
count += 1 | |
if sub.strip() == '': | |
if count == 1: | |
print('WARNING: No subtitle for chunk at line {}'.format(chunk_count)) | |
break | |
if count == 3: | |
print( | |
'WARNING: Three lines of subtitle at chunk on line {}' | |
.format(chunk_count) | |
) | |
if count > 3: | |
assert False, ( | |
'More than 3 lines at chunk on line {}'.format(chunk_count)) | |
return count + 1 | |
with open(sys.argv[1], 'r') as f: | |
consumed = 0 | |
for line in f: | |
consumed += 1 | |
consumed += check_chunk(consumed, line, f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment