Skip to content

Instantly share code, notes, and snippets.

@berinhard
Last active November 12, 2021 15:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save berinhard/7678b841919891225b86552c0978b877 to your computer and use it in GitHub Desktop.
Save berinhard/7678b841919891225b86552c0978b877 to your computer and use it in GitHub Desktop.
Gist do convert Amazon's transcribe JSON to SRT format
"""
$ python json_to_srt.py <json_filepath> <output_srt>
Reference: https://aws.amazon.com/blogs/machine-learning/create-video-subtitles-with-translation-using-machine-learning/
"""
import sys
import json
def getPhrasesFromTranscript( ts ):
# This function is intended to be called with the JSON structure output from the Transcribe service. However,
# if you only have the translation of the transcript, then you should call getPhrasesFromTranslation instead
# Now create phrases from the translation
items = ts['results']['items']
#set up some variables for the first pass
phrase = {'words': []}
phrases = []
nPhrase = True
x = 0
c = 0
print("==> Creating phrases from transcript...")
for item in items:
# if it is a new phrase, then get the start_time of the first item
if nPhrase == True:
if item["type"] == "pronunciation":
phrase["start_time"] = getTimeCode( float(item["start_time"]) )
nPhrase = False
c+= 1
else:
# We need to determine if this pronunciation or puncuation here
# Punctuation doesn't contain timing information, so we'll want
# to set the end_time to whatever the last word in the phrase is.
# Since we are reading through each word sequentially, we'll set
# the end_time if it is a word
if item["type"] == "pronunciation":
phrase["end_time"] = getTimeCode( float(item["end_time"]) )
# in either case, append the word to the phrase...
phrase["words"].append(item['alternatives'][0]["content"])
x += 1
# now add the phrase to the phrases, generate a new phrase, etc.
if x == 10:
#print c, phrase
phrases.append(phrase)
phrase = {'words': []}
nPhrase = True
x = 0
for p in phrases:
phrase = ' '.join(p['words'])
p['words'] = phrase.replace(' ,', ',').replace(' ?', '?').replace(' .', '.').replace(' !', '!')
return phrases
def getTimeCode( seconds ):
# Format and return a string that contains the converted number of seconds into SRT format
t_hund = int(seconds % 1 * 1000)
tseconds = int( seconds )
tsecs = ((float( tseconds) / 60) % 1) * 60
tmins = int( tseconds / 60 )
return str( "%02d:%02d:%02d,%03d" % (00, tmins, int(tsecs), t_hund ))
if __name__ == '__main__':
assert 3 == len(sys.argv)
json_filename = sys.argv[1]
output_filename = sys.argv[2]
with open(json_filename) as fd:
data = json.load(fd)
phrases = getPhrasesFromTranscript(data)
with open(output_filename, 'w') as fd:
for i, content in enumerate(phrases):
fd.write(f'{i}\n')
fd.write(f'{content["start_time"]} --> {content["end_time"]}\n')
fd.write(f'{content["words"]}\n\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment