Skip to content

Instantly share code, notes, and snippets.

@benosteen
Created April 5, 2017 13:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benosteen/d5b3e3a8a83d43ff277f2213c7c77c08 to your computer and use it in GitHub Desktop.
Save benosteen/d5b3e3a8a83d43ff277f2213c7c77c08 to your computer and use it in GitHub Desktop.
from xml.etree import ElementTree as ET
import re
def format_t(time_string, frac_sep = ".", output_frag_sep = "."):
h = m = secs = ms = 0
frags = time_string.split(":")
spart = frags[-1].split(frac_sep)
if len(spart) == 1:
s = int(spart[0])
elif len(spart) == 2:
s = int(spart[0])
# small trick - "2.1" should be 2.100, not 2.001
ms = int(float("0." + spart[1]) * 1000)
if len(frags) == 2:
m = int(frags[0])
elif len(frags) == 3:
h = int(frags[0])
m = int(frags[1])
return "{0:02d}:{1:02d}:{2:02d}{3}{4:03d}".format(h,m,s, output_frag_sep,ms)
def assert_time_format(time_string):
p = re.compile("\d{2}\:\d{2}:\d{2}[\.\,]\d{3}")
if p.match(time_string) != None:
return True
return False
def convert_to_srt(xmlfile, srtfile):
with open(xmlfile, "r") as srcf:
with open(srtfile, "w") as dstf:
doc = ET.fromstring(srcf.read())
anno_count = 1
for anno in doc.findall("annotations/annotation"):
text_e = anno.find("TEXT")
t_rects = [x for x in anno.findall("segment/movingRegion/rectRegion")]
if text_e.text != None and len(t_rects) == 2:
# add new annotation
dstf.write("\n{0}\n".format(anno_count))
time1 = format_t(t_rects[0].attrib["t"])
time2 = format_t(t_rects[1].attrib["t"])
if not assert_time_format(time1) or not assert_time_format(time2):
print("Time is formatted incorrectly for annotation {0}:".format(anno_count))
print("time1: {0}".format(time1))
print("time2: {0}".format(time2))
dstf.write("{0} --> {1}\n".format(time1, time2))
dstf.write(text_e.text)
dstf.write("\n")
anno_count += 1
else:
print("Annotation found, but couldn't parse it for some reason:")
if text_e.text == None:
print("No text found in the TEXT element")
if len(t_rects) != 2:
print("More than 2 rect regions found for some reason...")
if __name__ == "__main__":
import sys
if len(sys.argv) != 3:
print("Usage: python convert_to_srt.py xmlfilename.xml srtfilename.srt")
else:
_, xmlf, srtf = sys.argv
convert_to_srt(xmlf, srtf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment