Skip to content

Instantly share code, notes, and snippets.

@miracle2k
Created February 22, 2019 15:17
Show Gist options
  • Save miracle2k/617a9b7d24f2ee9cd6e7c38b1dad3444 to your computer and use it in GitHub Desktop.
Save miracle2k/617a9b7d24f2ee9cd6e7c38b1dad3444 to your computer and use it in GitHub Desktop.
Convert the output from Google Cloud Speech Recognition to ELRC.
"""
Convert the output from Google Cloud Speech Recognition to ELRC.
"""
import json, sys
class ELRC:
def __init__(self):
self.lines = []
def add_line(self):
if not self.lines or self.lines[-1]:
self.lines.append([])
def add_word(self, word):
line = self.lines[-1]
if len(line) >= 20:
self.add_line()
line.append(word)
def convert(response):
"""
ELRC looks like this:
[00:00.000] sdfasdf asdf <00:01.532> asd <00:01.884> fas <00:02.566> dfa sdf asdf asdf as <00:02.951> dfasdfa
[00:03.180] asdf asdf asdf asdf
"""
elrc = ELRC()
response = response['response']
for result in response['results']:
# Between each result there is usually a small pause (it seems!), so start a new line on each.
elrc.add_line()
main_alternative = result['alternatives'][0]
for word in main_alternative['words']:
elrc.add_word(word)
# Format as text
lines_text = []
for line in elrc.lines:
line_text = [f'<{word["startTime"][:-1]}> {word["word"]}' for word in line]
line_text.insert(0, f'[{line[0]["startTime"][:-1]}]')
lines_text.append(" ".join(line_text))
return "\n".join(lines_text)
with open(sys.argv[1]) as f:
content = json.loads(f.read())
print(convert(content))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment