heaversm/twilio-transcription-back.js

## twilio-transcription-back.js
//node vars
const express = require('express')
const twilio = require('twilio')
const request = require('request')

//twilio vars
const accountSid = '###' //your twilio account SID
const authToken = "###" //your twilio auth token
const client = require('twilio')(accountSid, authToken)
const baseURL = 'https://api.twilio.com/2010-04-01/Accounts/[YOUR_ACCOUNT_HERE]/Recordings/'

//cloud speech vars
const Speech = require('@google-cloud/speech')
const projectId = 'twilio-translated'
const speechClient = Speech({
  projectId: projectId
})
const speech_options = {
  encoding: 'LINEAR16',
  sampleRateHertz: 8000,
  languageCode: 'en-US',
  verbose: true,
}

let app = express()

//recording the call
app.post('/record', (request, response) => { //you will have to go to your twilio console and tell it to use this route for incoming calls (e.g. https://yourserver.com/record)

  // Use the Twilio Node.js SDK to build an XML response
  let twiml = new twilio.TwimlResponse()
  twiml.say('Hello. Please leave your message. Press any button to end recording.') //to speak a robot recorded voice message

  // Use <Record> to record and transcribe the caller's message
  twiml.record({
    transcribe: true, //I am leaving the transcription in here so I can compare twilio transcriptions to google ones. In your app, this can be omitted or set to false
    maxLength: 30, //seconds to record
  })

  // End the call with <Hangup>
  twiml.hangup()

  // Render the response as XML in reply to the webhook request
  response.type('text/xml')
  response.send(twiml.toString())
})

app.get('/list', (request, response) => { //I'm showing
  let hasSentData = false //twilio calls and their transcriptions are accessed via two different API endpoints, therefore I am making sure to collect both, combine them, and then send them back to the front end user

  const responseData = {
    recordings: [],
    transcriptions: [],
  }
  client.recordings.list(function (err, data) {
    data.forEach(function (recording) {

      const recordingObj = {
        recordingSid: recording.sid,
      }
      responseData.recordings.push(recordingObj)
    })
    if (responseData.transcriptions.length > 0 && !hasSentData) { //if we have transcriptions already, we can send the recordings along with the transcriptions
      hasSentData = true
      response.send(responseData)
    }

  })

  client.transcriptions.list(function (err, data) {
    data.forEach(function (transcription) {

      if (transcription.status != 'failed') { //only return recordings with an actual twilio transcription
        const transcriptionObj = {
          transcriptionText: transcription.transcriptionText,
          recordingSid: transcription.recordingSid,
        }
        responseData.transcriptions.push(transcriptionObj)
      }
    })
    if (responseData.recordings.length > 0 && !hasSentData) {
      hasSentData = true
      response.send(responseData)
    }
  })
})

app.get('/transcribe', (req, res) => {
  const speech_options = {
    encoding: 'LINEAR16',
    sampleRateHertz: 8000,
    languageCode: 'en-US',
    verbose: true,
  }
  const sid = req.query.sid
  const directURL = baseURL + sid + '.wav'
  const transcribeCallback = (err, transcript, apiResponse) => {
    if (err) {
      console.log(err)
    } else {
      const transcriptText = transcript.map((item) => {
        return item.transcript
      }).join(" ")
      res.send(transcriptText)
    }
  }
  speechClient.recognize(directURL, speech_options, transcribeCallback)
}

app.use(express.static('public_html')) //I am having express serve my static front end app, your application might be different

app.listen([YOUR_PORT], function () { //run the server and listen for all incoming requests
  console.log('listening')
})

## twilio-transcription-front.js
loadFiles = function () {
  $.ajax({
      method: "GET",
      url: "/list"
  }).done(function (data) {
    //associate calls with transcriptions (this should probably be done on the backend, really)
    for (var i = 0; i < data.transcriptions.length; i++) {
        var thisTranscription = data.transcriptions[i]
        var thisRecording = data.recordings.filter(recording => recording.recordingSid === thisTranscription.recordingSid);
        if (thisRecording.length) {
            thisTranscription.recordingURL = thisRecording[0].recordingURL
        }
    }
    playlist = data.transcriptions
    len = playlist.length
    loadAudio()
  })
}

//I am loading and transcribing the audio via google cloud on the fly from the front end. In reality, you'd be doing this on the backend and having your server store those transcriptions somewhere so they're not being transcribed every time the app is accessed by a front end user

loadAudio = function () {
    const curSID = playlist[current].recordingSid
    transcribeAudio(curSID)
    const audioSource = '/recordings/' + playlist[current].recordingSid
    $audio[0].addEventListener('canplaythrough', playAudio)
    $audio[0].load()
}

transcribeAudio = function (audioSource, curSID) {
    var params = { source: audioSource, sid: curSID }
    $.get('/transcribe', params).done((data) => {
        console.log(data) //log the google speech api transcription
    })
}

playAudio = function () {
    if (playlist[current] && playlist[current].transcriptionText != null) {
        console.log(playlist[current].transcriptionText) //log the twilio speech api transcription
    }
}
	//node vars
	const express = require('express')
	const twilio = require('twilio')
	const request = require('request')

	//twilio vars
	const accountSid = '###' //your twilio account SID
	const authToken = "###" //your twilio auth token
	const client = require('twilio')(accountSid, authToken)
	const baseURL = 'https://api.twilio.com/2010-04-01/Accounts/[YOUR_ACCOUNT_HERE]/Recordings/'

	//cloud speech vars
	const Speech = require('@google-cloud/speech')
	const projectId = 'twilio-translated'
	const speechClient = Speech({
	projectId: projectId
	})
	const speech_options = {
	encoding: 'LINEAR16',
	sampleRateHertz: 8000,
	languageCode: 'en-US',
	verbose: true,
	}

	let app = express()

	//recording the call
	app.post('/record', (request, response) => { //you will have to go to your twilio console and tell it to use this route for incoming calls (e.g. https://yourserver.com/record)

	// Use the Twilio Node.js SDK to build an XML response
	let twiml = new twilio.TwimlResponse()
	twiml.say('Hello. Please leave your message. Press any button to end recording.') //to speak a robot recorded voice message

	// Use <Record> to record and transcribe the caller's message
	twiml.record({
	transcribe: true, //I am leaving the transcription in here so I can compare twilio transcriptions to google ones. In your app, this can be omitted or set to false
	maxLength: 30, //seconds to record
	})

	// End the call with <Hangup>
	twiml.hangup()

	// Render the response as XML in reply to the webhook request
	response.type('text/xml')
	response.send(twiml.toString())
	})

	app.get('/list', (request, response) => { //I'm showing
	let hasSentData = false //twilio calls and their transcriptions are accessed via two different API endpoints, therefore I am making sure to collect both, combine them, and then send them back to the front end user

	const responseData = {
	recordings: [],
	transcriptions: [],
	}
	client.recordings.list(function (err, data) {
	data.forEach(function (recording) {

	const recordingObj = {
	recordingSid: recording.sid,
	}
	responseData.recordings.push(recordingObj)
	})
	if (responseData.transcriptions.length > 0 && !hasSentData) { //if we have transcriptions already, we can send the recordings along with the transcriptions
	hasSentData = true
	response.send(responseData)
	}

	})

	client.transcriptions.list(function (err, data) {
	data.forEach(function (transcription) {

	if (transcription.status != 'failed') { //only return recordings with an actual twilio transcription
	const transcriptionObj = {
	transcriptionText: transcription.transcriptionText,
	recordingSid: transcription.recordingSid,
	}
	responseData.transcriptions.push(transcriptionObj)
	}
	})
	if (responseData.recordings.length > 0 && !hasSentData) {
	hasSentData = true
	response.send(responseData)
	}
	})
	})

	app.get('/transcribe', (req, res) => {
	const speech_options = {
	encoding: 'LINEAR16',
	sampleRateHertz: 8000,
	languageCode: 'en-US',
	verbose: true,
	}
	const sid = req.query.sid
	const directURL = baseURL + sid + '.wav'
	const transcribeCallback = (err, transcript, apiResponse) => {
	if (err) {
	console.log(err)
	} else {
	const transcriptText = transcript.map((item) => {
	return item.transcript
	}).join(" ")
	res.send(transcriptText)
	}
	}
	speechClient.recognize(directURL, speech_options, transcribeCallback)
	}

	app.use(express.static('public_html')) //I am having express serve my static front end app, your application might be different

	app.listen([YOUR_PORT], function () { //run the server and listen for all incoming requests
	console.log('listening')
	})
	loadFiles = function () {
	$.ajax({
	method: "GET",
	url: "/list"
	}).done(function (data) {
	//associate calls with transcriptions (this should probably be done on the backend, really)
	for (var i = 0; i < data.transcriptions.length; i++) {
	var thisTranscription = data.transcriptions[i]
	var thisRecording = data.recordings.filter(recording => recording.recordingSid === thisTranscription.recordingSid);
	if (thisRecording.length) {
	thisTranscription.recordingURL = thisRecording[0].recordingURL
	}
	}
	playlist = data.transcriptions
	len = playlist.length
	loadAudio()
	})
	}

	//I am loading and transcribing the audio via google cloud on the fly from the front end. In reality, you'd be doing this on the backend and having your server store those transcriptions somewhere so they're not being transcribed every time the app is accessed by a front end user

	loadAudio = function () {
	const curSID = playlist[current].recordingSid
	transcribeAudio(curSID)
	const audioSource = '/recordings/' + playlist[current].recordingSid
	$audio[0].addEventListener('canplaythrough', playAudio)
	$audio[0].load()
	}

	transcribeAudio = function (audioSource, curSID) {
	var params = { source: audioSource, sid: curSID }
	$.get('/transcribe', params).done((data) => {
	console.log(data) //log the google speech api transcription
	})
	}

	playAudio = function () {
	if (playlist[current] && playlist[current].transcriptionText != null) {
	console.log(playlist[current].transcriptionText) //log the twilio speech api transcription
	}
	}