Skip to content

Instantly share code, notes, and snippets.

@royshil
Last active December 30, 2021 14:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save royshil/3c62fb0d72d7ec083f488425da314bb0 to your computer and use it in GitHub Desktop.
Save royshil/3c62fb0d72d7ec083f488425da314bb0 to your computer and use it in GitHub Desktop.
Google Cloud Speech JSON to .str converter script, with timestamp keeping and multiple files support
#!/usr/local/bin/python3
import os
import json
import sys
import argparse
import math
import subprocess
from tqdm import tqdm
parser = argparse.ArgumentParser(description='Convert google ml speech .json to .srt subtitles.')
parser.add_argument('files', metavar='F', type=str, nargs='+',
help='json files for converting')
parser.add_argument('--concat', dest='concat', action='store_const',
const=True, default=False,
help='concatenate the subtitles to a single .srt output file')
parser.add_argument('--fix_timestamps', '-t', dest='fix_timestamps', action='store_const',
const=True, default=False,
help='fix timestamps of consequent files')
args = parser.parse_args()
def mktime(x):
hours = math.floor(x / 3600)
minutes = math.floor((x - hours * 3600) / 60)
seconds = x % 60
return ("%02d:%02d:%02.03f" % (hours, minutes, seconds)).replace('.', ',')
output_file = None
if args.concat and len(args.files) > 0:
output_filename = os.path.splitext(args.files[0])[0] + '.srt'
output_file = open(output_filename, 'w')
sub_id = 0
# a single subtitle line template
sub_line_template = """%d
%s --> %s
%s
"""
def write_sub(sub, sub_start_time, sub_end_time):
global sub_id, sub_line_template
sentence = ' '.join(sub)
sub_id += 1
output_file.write(sub_line_template % (sub_id,
mktime(sub_start_time),
mktime(sub_end_time).replace('.', ','),
sentence))
for file_name in tqdm(args.files):
with open(file_name, 'r') as in_file:
trans_json = json.load(in_file)
if not args.concat:
output_filename = os.path.splitext(file_name)[0] + '.srt'
output_file = open(output_filename, 'w')
sub_id = 0
# sometimes the results are in /results and sometimes in /response/results
results_element = trans_json['results'] if 'results' in trans_json else \
trans_json['response']['results']
start_timestamp = 0
if args.fix_timestamps:
flac_file = os.path.splitext(file_name)[0] + '.flac'
p = subprocess.run(["ffprobe", "-i", flac_file, "-show_frames", "-show_entries",
"frame=pkt_pts_time", "-of", "csv=p=0", "-hide_banner", "-v", "0"],
capture_output=True)
start_timestamp = float(p.stdout.splitlines()[0])
for i, a in enumerate(results_element):
sent = a['alternatives'][0]
words = sent['words'] # usually 20-30 words
num_words = len(words)
sub = [words[0]['word']]
sub_start_time = float(words[0]['startTime'][:-1]) + start_timestamp
sub_end_time = float(words[0]['endTime'][:-1]) + start_timestamp
sub_start_wi = 0
sub_end_wi = 0
wi = 0 # word index in sentence
while wi < num_words:
wi += 1
if wi >= num_words:
if len(sub) > 0:
# write out the remaining words in the sentence
write_sub(sub, sub_start_time, sub_end_time)
break
next_word_start_time = float(words[wi]['startTime'][:-1]) + start_timestamp
next_word_end_time = float(words[wi]['endTime'][:-1]) + start_timestamp
# max 10 seconds and max 10 words
if sub_end_wi - sub_start_wi < 10 and next_word_start_time - sub_start_time < 10:
# add next word
sub.append(words[wi]['word'])
sub_end_time = next_word_end_time
sub_end_wi = wi
else:
# write a sub without next word
write_sub(sub, sub_start_time, sub_end_time)
# add next word to next sub
if wi < num_words:
sub = [words[wi]['word']]
sub_start_time = float(words[wi]['startTime'][:-1]) + start_timestamp
sub_end_time = float(words[wi]['endTime'][:-1]) + start_timestamp
sub_start_wi = wi
sub_end_wi = wi
if not args.concat:
output_file.close()
@shadyslim2018
Copy link

When I execute the code I receive an error :

Traceback (most recent call last):
  File "google_cloud_speech_json_to_srt.py", line 78, in <module>
    start_timestamp = float(p.stdout.splitlines()[0]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment