Skip to content

Instantly share code, notes, and snippets.

@SAPikachu
Created April 12, 2012 13:40
Show Gist options
  • Save SAPikachu/2367357 to your computer and use it in GitHub Desktop.
Save SAPikachu/2367357 to your computer and use it in GitHub Desktop.
import re
from optparse import OptionParser
import sys
import os
import subprocess
from tempfile import mkstemp
import chardet
BUILTIN_FONTS = [
"arial", "tahoma", "simhei", "simsun", "youyuan", "黑体", "宋体", "幼圆"
]
def scan_fonts(subs):
for filename, sub in subs.items():
font_to_style_mapping = {}
sub["font_styles"] = font_to_style_mapping
for style_match in re.finditer(
r"^\s*Style:\s*(?P<style_name>.+?),(?P<font>.+?),",
sub["content"],
re.I | re.M,
):
font = style_match.group("font").lower()
style_name = style_match.group("style_name")
if not re.search(
r"^\s*Dialogue:\s*.*{}.*$".format(re.escape(style_name)),
sub["content"],
re.I | re.M,
):
continue
if font not in font_to_style_mapping:
font_to_style_mapping[font] = []
font_to_style_mapping[font].append(style_name)
for code_match in re.finditer(
r"\\fn(?P<font>.+?)(\\|})",
sub["content"],
re.I,
):
font = code_match.group("font").lower()
if font not in font_to_style_mapping:
font_to_style_mapping[font] = []
def format_output_file_name(format, original_file_name):
dirname, full_name = os.path.split(original_file_name)
name, ext = os.path.splitext(full_name)
return format.format(
dirname=dirname,
full_name=full_name,
name=name,
ext=ext,
)
def flatten(x):
"""flatten(sequence) -> list
Returns a single, flat list which contains all elements retrieved
from the sequence and all recursively contained sub-sequences
(iterables).
Examples:
>>> [1, 2, [3,4], (5,6)]
[1, 2, [3, 4], (5, 6)]
>>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, MyVector(8,9,10)])
[1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]"""
result = []
for el in x:
if hasattr(el, "__iter__") and not isinstance(el, (str, bytes)):
result.extend(flatten(el))
else:
result.append(el)
return result
def build_script(
sub_files, ignored_fonts, output_subfile_format, output_font_format,
mkvmerge_options_file,
):
ignored_fonts = ignored_fonts or []
subs = {}
for file in sub_files:
with open(file, "rb") as f:
raw_content = f.read()
chardet_result = chardet.detect(raw_content)
subs[file] = {
"encoding": chardet_result["encoding"],
"content": raw_content.decode(chardet_result["encoding"]),
}
scan_fonts(subs)
used_fonts = flatten([x["font_styles"].keys() for x in subs.values()])
used_fonts = set(used_fonts) - set(BUILTIN_FONTS) - set(ignored_fonts)
temp_file_name = mkstemp()[1]
output_stream = sys.stdout
def out(*args, **kwargs):
print(file=output_stream, *args, **kwargs)
out('del "{}" >nul 2>nul'.format(temp_file_name))
for sub_name, sub in subs.items():
sub["output_name"] = format_output_file_name(
output_subfile_format, sub_name
)
out('copy /y "{}" "{}"'.format(sub_name, sub["output_name"]))
font_outputs = []
for font_name in used_fonts:
for sub_name, sub in subs.items():
if font_name in sub["font_styles"]:
styles = sub["font_styles"][font_name]
out(
('SSATextRip "{sub_name}" "i,text,contains,{font_name}" ' +
'{style_rules} >> "{temp_file}"').format(
sub_name=sub_name,
font_name=font_name,
temp_file=temp_file_name,
style_rules=" ".join(
['"i,style,=,{}"'.format(x) for x in styles]
),
)
)
file_output = subprocess.check_output(
["GetFontPath.exe", font_name],
universal_newlines=True,
)
files = file_output.strip().splitlines()
assert files
for file_name in files:
if not os.path.isabs(file_name):
file_name = \
os.path.expandvars("%SystemRoot%\\Fonts\\" + file_name)
assert os.path.isfile(file_name)
font_output = format_output_file_name(output_font_format, file_name)
font_outputs.append(font_output)
out((r'SubsetFont "{input}" "{output}" "{char_file}" -r | ' +
r'SSARepFont {repfont_entries}').format(
input=file_name,
output=font_output,
char_file=temp_file_name,
repfont_entries=' '.join(
['-f "{0}::{0}"'.format(x["output_name"])
for x in subs.values()]
),
))
out('del "{}"'.format(temp_file_name))
if mkvmerge_options_file:
with open(mkvmerge_options_file, "w", encoding="utf-8") as f:
for output_file in font_outputs:
print("--attachment-mime-type", file=f)
print("application/x-truetype-font", file=f)
print("--attach-file", file=f)
print(output_file.replace("\\", "\\\\"), file=f)
def parse_args():
parser = OptionParser()
parser.add_option("-n", "--ignore_font",
action="append", dest="ignored_fonts")
parser.add_option("-s", "--output-subfile-format", default="{name}.n{ext}")
parser.add_option("-f", "--output-font-format",
default="{name}-subset{ext}")
parser.add_option("-m", "--mkvmerge-options-file", default="mux_fonts.txt")
options, args = parser.parse_args()
ret = options.__dict__
ret["sub_files"] = args
return ret
if __name__ == "__main__":
build_script(**parse_args())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment