Convert audio to text using IBM Watson SpeechToText API.
API Reference: https://www.ibm.com/watson/developercloud/speech-to-text/api/v1/?curl#introduction
Each JSON result is stored on a separate file. Results are aggregated in a CSV file.
import json, os
from watson_developer_cloud import SpeechToTextV1
def fwrite(file, value):
fh = open(file, "w")
fh.write(value)
fh.close()
def fappend(file, value):
fh = open(file, "a")
fh.write(value)
fh.close()
speech_to_text = SpeechToTextV1(
username=os.getenv('WATSONUSER'),
password=os.getenv('WATSONPASS'),
x_watson_learning_opt_out=False
)
files = os.listdir('./media/audios/')
for file in files:
print('processing {}'.format(file))
filename, fileext = os.path.splitext(file)
if os.path.isfile('./transcripts/audios/{}.json'.format(filename)):
continue
try:
with open('./media/audios/{}'.format(file), 'rb') as audio_file:
transcript = ''
# send request to watson
results = speech_to_text.recognize(
audio = audio_file,
content_type = 'audio/mp3',
timestamps = False,
word_confidence = False,
max_alternatives = 1
)
# save results to json file
fwrite('./transcripts/audios/{}.json'.format(filename), json.dumps(results, indent=2))
# concat all alternatives
for result in results['results']:
transcript += result['alternatives'][0]['transcript']
# add result to csv file
fappend('./transcripts.csv', '{},"{}"\n'.format(filename, transcript))
except Exception, e:
fappend('./transcripts.csv', '{},""\n'.format(filename))
print('all done!')