Skip to content

Instantly share code, notes, and snippets.

@mzmttks
Created November 12, 2014 09:06
Show Gist options
  • Save mzmttks/763946813e02fda54c55 to your computer and use it in GitHub Desktop.
Save mzmttks/763946813e02fda54c55 to your computer and use it in GitHub Desktop.
Windows Movie Maker で作った xlmp ファイルから字幕だけを取り出す。 ref: http://qiita.com/mzmttks/items/993c111d6ddf3a3425bd
"""
Script Extractor from Windows Movie Maker XML file
This script takes wlmp (Windows Movie Maker file)
as an input, and extracts the text (TitleClip tag).
Author: mzmttks
License: MIT License
"""
import sys
import lxml.etree
try:
ifile = sys.argv[1]
except:
err = """ERROR: INPUT_FILE is not given
usage: wlmp2script.py INPUT_FILE
"""
sys.stderr.writelines(err)
sys.exit(1)
# open wlmp
with open(ifile) as handle:
obj = lxml.etree.fromstring(" ".join(handle))
# extract subtitles
textsets = {}
for titleclip in obj.xpath("//TitleClip"):
strs = u""
for strset in titleclip.iterdescendants("BoundPropertyStringSet"):
if strset.attrib["Name"] != "string":
continue
strs = [s.attrib["Value"] for s
in strset.iterchildren("BoundPropertyStringElement")
if len(s.attrib["Value"]) > 0]
strs = map(unicode, strs)
strs = "\n".join(strs)
textsets[titleclip.attrib["extentID"]] = unicode.encode(strs, "utf-8")
titles = [e.attrib["id"] for e in obj.xpath("//ExtentRef")]
# output subtitles
for title in titles:
if title in textsets.keys() and len(textsets[title])>0:
print textsets[title]
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment