Skip to content

Instantly share code, notes, and snippets.

@phondanai
Last active December 9, 2016 08:04
Show Gist options
  • Save phondanai/8db25921726dbcef9eb5f2c42b0cb53b to your computer and use it in GitHub Desktop.
Save phondanai/8db25921726dbcef9eb5f2c42b0cb53b to your computer and use it in GitHub Desktop.
Remove html tag (e.g.. color hex code) from SubRip (.srt) subtitle file
#!/usr/bin/env python3
import re
import sys
import os
def show_help():
print("Usage:")
print(sys.argv[0] + " <input_srt_file>" + " <output_srt_file>")
if len(sys.argv) > 3:
show_help()
sys.exit()
elif len(sys.argv) == 3:
unclean_file = sys.argv[1]
cleaned_file = sys.argv[2]
else:
unclean_file = sys.argv[1]
cleaned_file = False
pattern = r"<(.|\n)*?>"
try:
with open(unclean_file, 'r') as f:
sub_txt = f.read()
cleaned = re.search(pattern, sub_txt)
if not cleaned:
print("{} is cleaned".format(unclean_file))
sys.exit()
else:
clean_sub_txt = re.sub(pattern, "", sub_txt)
if(cleaned_file):
try:
with open(cleaned_file, 'w') as w:
w.write(clean_sub_txt)
except PermissionError:
print("ERROR: Can not write file {}".format(cleaned_file))
else:
print(clean_sub_txt)
except FileNotFoundError:
print("ERROR: Subtitle file not found")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment