ecovictoriano/python-watson-stt.md

## python-watson-stt.md

      
    Raw
  

              python-watson-stt.md
            
          
    Transcribe audio file to text (speech-to-text) using IBM's Watson SpeechToText API

Convert audio to text using IBM Watson SpeechToText API.
API Reference: https://www.ibm.com/watson/developercloud/speech-to-text/api/v1/?curl#introduction
Each JSON result is stored on a separate file.
Results are aggregated in a CSV file.
import json, os
from watson_developer_cloud import SpeechToTextV1

def fwrite(file, value):
  fh = open(file, "w")
  fh.write(value)
  fh.close()

def fappend(file, value):
  fh = open(file, "a")
  fh.write(value)
  fh.close()

speech_to_text = SpeechToTextV1(
  username=os.getenv('WATSONUSER'),
  password=os.getenv('WATSONPASS'),
  x_watson_learning_opt_out=False
)

files = os.listdir('./media/audios/')

for file in files:
  print('processing {}'.format(file))
  filename, fileext = os.path.splitext(file)
  
  if os.path.isfile('./transcripts/audios/{}.json'.format(filename)):
    continue
  
  try:
    with open('./media/audios/{}'.format(file), 'rb') as audio_file:
      transcript = ''
      # send request to watson
      results = speech_to_text.recognize(
                  audio              = audio_file,
                  content_type       = 'audio/mp3',
                  timestamps         = False,
                  word_confidence    = False,
                  max_alternatives   = 1
              )
      
      # save results to json file
      fwrite('./transcripts/audios/{}.json'.format(filename), json.dumps(results, indent=2))

      # concat all alternatives
      for result in results['results']:
        transcript += result['alternatives'][0]['transcript']
      
      # add result to csv file
      fappend('./transcripts.csv', '{},"{}"\n'.format(filename, transcript))
  except Exception, e:
    fappend('./transcripts.csv', '{},""\n'.format(filename))

print('all done!')