Skip to content

Instantly share code, notes, and snippets.

@tansautn
Created September 15, 2020 00:45
Show Gist options
  • Save tansautn/b7e45a312ade395670f97bc5e125fd7e to your computer and use it in GitHub Desktop.
Save tansautn/b7e45a312ade395670f97bc5e125fd7e to your computer and use it in GitHub Desktop.
Python convert IBM Watson Speech To Text JSON Response to SRT
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details. http://www.gnu.org/licenses/
import json
import math
# function to format time for SRT file
def format_time(time, format):
ms = (str(time).split("."))[1]
sec = time
min = sec / 60
hr = min / 60
if format == "srt":
return "%02d:%02d:%02d,%s" % (hr, min % 60, sec % 60, ms.ljust(3,'0'))
if format == "vtt" or "stl":
return "%02d:%02d:%02d.%s" % (hr, min % 60, sec % 60, ms.ljust(3,'0'))
def srt_time(self):
ms = (str(self).split("."))[1]
sec = self
min = sec / 60
hr = min / 60
time = "%02d:%02d:%02d,%s" % (hr, min % 60, sec % 60, ms.ljust(3,'0'))
return time
# function to format time for VTT file
def vtt_time(self):
ms = (str(self).split("."))[1]
sec = self
min = sec / 60
hr = min / 60
time = "%02d:%02d:%02d.%s" % (hr, min % 60, sec % 60, ms.ljust(3,'0'))
return time
# function to format time for VTT file
def stl_time(self):
ms = (str(self).split("."))[1]
sec = self
min = sec / 60
hr = min / 60
time = "%02d:%02d:%02d.%s" % (hr, min % 60, sec % 60, ms.ljust(2,'0'))
return time
# Load JSON from Watson (replace watson.json with path to JSON file)
str_data = open('./result.json').read()
# print (str_data);
try:
json_data = json.loads(str_data)
except:
print ("ERROR: The JSON is not formatted properly.")
quit()
# open subtitle files in write mode (overwrites if it already exists)
f_srt = open('subtitles.srt','w')
# f_vtt = open('subtitles.vtt','w')
# f_stl = open('subtitles.txt','w')
# f_scc = open('subtitles.scc','w')
# starts subtitle id counter for SRT file
sub_id = 0
# writes headers for VTT file and set VTT display format
# f_vtt.write("WEBVTT\n\n")
display_vtt = "align:middle line:84%"
# writes header for STL .txt file
stl_header = """{QTtext}{timescale:100}{textBox: 0, 0, 45, 0}{font:Arial}{size:16}{backColor:0,0,0}
{textColor:65535,65535,65535}{width:640}{height:40}{justify:Center}\n\n\n"""
# f_stl.write(stl_header)
# writes header for SCC .scc file
scc_header = "Scenarist_SCC V1.0\n\n"
# f_scc.write(scc_header)
# iterate through JSON array
for x in json_data["results"]:
if "final" not in x:
continue
for y in x["alternatives"]:
if "final" not in x:
break
tran_start = 0
tran_end = 0
word_list = []
tran_list = []
custom_tran = ""
# checks to see is transcript is long (over 60 chars)
if len(y["transcript"]) > 60:
long_tran = True
# calculate how many characters allowed per subtitle
max_chars = len(y["transcript"]) / math.ceil( len(y["transcript"]) / 60.0 )
else:
long_tran = False
# iterates JSON and records transcript lines with start and end times
# ignores entry if Watson confidence is 0.00
if "confidence" in y and y["confidence"] == 0.00:
break
else:
try:
for z in y["timestamps"]:
if "final" not in x:
break
word = str(z[0])
custom_tran = custom_tran + "%s " % word
word_start = z[1]
if tran_start == 0 or word_start < tran_start:
tran_start = word_start
word_end = z[2]
if word_end > tran_end:
tran_end = word_end
if long_tran is True :
if len(custom_tran) > max_chars :
sub_id += 1
tran_list.append([sub_id,custom_tran,tran_start,tran_end])
custom_tran = ""
tran_start = 0
tran_end = 0
sub_id += 1
tran_list.append([sub_id,custom_tran,tran_start,tran_end])
except:
print ('ERROR: Cannot find timestamps in JSON. Please ensure word timestamps are enabled in Watson.')
quit()
for x in tran_list:
# formats time for SRT format
st_time = format_time(x[2], "srt")
en_time = format_time(x[3], "srt")
# generates and writes block to SRT file, loops until all entries complete
block = "%s\n%s --> %s\n%s\n\n" % (x[0],st_time,en_time,x[1])
f_srt.write(block)
# formats time for VTT format
st_time = format_time(x[2], "vtt")
en_time = format_time(x[3], "vtt")
# generates and writes block to VTT file, loops until all entries complete
block = "%s\n%s --> %s %s\n%s\n\n" % (x[0],st_time,en_time,display_vtt,x[1])
# f_vtt.write(block)
# formats time for STL format
st_time = format_time(x[2], "stl")
block = "[%s]\n%s\n\n" % (st_time,x[1])
# f_stl.write(block)
f_srt.close()
# f_vtt.close()
# f_stl.close()
# f_scc.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment