Skip to content

Instantly share code, notes, and snippets.

@bibby
Created January 25, 2017 06:44
Show Gist options
  • Save bibby/b4bec314983522e96ab9cc38faba798f to your computer and use it in GitHub Desktop.
Save bibby/b4bec314983522e96ab9cc38faba798f to your computer and use it in GitHub Desktop.
ass UTF-16 subfile to UTF-8 plain text
# rfw/python-ass does the heavy lifting,
# but I needed to hack around UTF-16 encoded files,
# saving them back as UTF-8
import ass
import re
import sys
in_file = sys.argv[1]
out_file = re.sub('[as]+$', 'txt', in_file)
tmp_file = '/tmp/ass'
font_pattern = '\{[^\}]+\}'
with open(in_file, 'rb') as source_file:
with open(tmp_file, 'w+b') as dest_file:
contents = source_file.read()
dest_file.write(contents.decode('utf-16').encode('utf-8'))
with open(tmp_file, "r") as f:
with open(out_file, 'w') as final_file:
doc = ass.parse(f)
for e in doc.events:
t = e.text
t = t.replace('\N', "\n")
t = re.sub(font_pattern, '' , t)
t = re.sub(' ', ' ' , t).strip()
final_file.write(t)
final_file.write("\n")
print t
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment