Created
November 12, 2014 09:06
-
-
Save mzmttks/763946813e02fda54c55 to your computer and use it in GitHub Desktop.
Windows Movie Maker で作った xlmp ファイルから字幕だけを取り出す。 ref: http://qiita.com/mzmttks/items/993c111d6ddf3a3425bd
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Script Extractor from Windows Movie Maker XML file | |
This script takes wlmp (Windows Movie Maker file) | |
as an input, and extracts the text (TitleClip tag). | |
Author: mzmttks | |
License: MIT License | |
""" | |
import sys | |
import lxml.etree | |
try: | |
ifile = sys.argv[1] | |
except: | |
err = """ERROR: INPUT_FILE is not given | |
usage: wlmp2script.py INPUT_FILE | |
""" | |
sys.stderr.writelines(err) | |
sys.exit(1) | |
# open wlmp | |
with open(ifile) as handle: | |
obj = lxml.etree.fromstring(" ".join(handle)) | |
# extract subtitles | |
textsets = {} | |
for titleclip in obj.xpath("//TitleClip"): | |
strs = u"" | |
for strset in titleclip.iterdescendants("BoundPropertyStringSet"): | |
if strset.attrib["Name"] != "string": | |
continue | |
strs = [s.attrib["Value"] for s | |
in strset.iterchildren("BoundPropertyStringElement") | |
if len(s.attrib["Value"]) > 0] | |
strs = map(unicode, strs) | |
strs = "\n".join(strs) | |
textsets[titleclip.attrib["extentID"]] = unicode.encode(strs, "utf-8") | |
titles = [e.attrib["id"] for e in obj.xpath("//ExtentRef")] | |
# output subtitles | |
for title in titles: | |
if title in textsets.keys() and len(textsets[title])>0: | |
print textsets[title] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment