Skip to content

Instantly share code, notes, and snippets.

@krhoyt
Created October 22, 2018 15:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save krhoyt/9c140342aaabde99ddf263e0a53cb98c to your computer and use it in GitHub Desktop.
Save krhoyt/9c140342aaabde99ddf263e0a53cb98c to your computer and use it in GitHub Desktop.
Upload audio file from web to Watson via Cloud Function.
class Captions {
// Constructor
constructor() {
// Holds audio dialog
this.conversation = [];
// Watson logo
// Used as proxy for file selector
// Allowing for customization of interaction
let watson = document.querySelector( 'button.file' );
watson.addEventListener( 'click', ( evt ) => this.doWatsonClick( evt ) );
// The actual file selector
// Not shown in the user interface
this.selector = document.querySelector( 'input[type="file"]' );
this.selector.addEventListener( 'change', ( evt ) => this.doSelectorChange( evt ) );
// Instructions
// Progress
// Dialog
this.copy = document.querySelector( 'p.copy' );
// Playback control
this.waveform = document.querySelector( '.waveform' );
// Waveform display
// Display dialog for progress
// Reset when finished playing
this.audio = WaveSurfer.create( {
container: 'div.audio',
waveColor: Captions.WAVEFORM_WAVE,
progressColor: Captions.WAVEFORM_PROGRESS,
height: Captions.WAVEFORM_HEIGHT
} );
this.audio.on( 'audioprocess', () => this.doAudioProcess() );
this.audio.on( 'finish', () => this.doAudioFinish() );
// Play/pause button
this.play = document.querySelector( 'button.play' );
this.play.addEventListener( 'click', ( evt ) => this.doPlayClick( evt ) );
}
// Called when audio has finished playing
doAudioFinish() {
// Stop audio
// Reset playhead to start
this.audio.stop();
// Update button to reflect status
this.play.classList.remove( 'pause' );
this.play.classList.add( 'play' );
}
// Called as the audio plays
// Updates the dialog accordingly
doAudioProcess() {
// Timing for audio
const timing = this.audio.getCurrentTime();
let found = false;
// Look through dialog for matching interval
for( let c = 0; c < this.conversation.length; c++ ) {
if( timing >= this.conversation[c][1] && timing < this.conversation[c][2] ) {
// Display copy for that interval
this.copy.innerHTML = this.conversation[c][0];
found = true;
break;
}
}
// No matching interval
// Display nothing
if( !found ) {
this.copy.innerHTML = '&nbsp;';
}
}
// Called when the play button is clicked
doPlayClick( evt ) {
// Play/pause audio
this.audio.playPause();
// Update icon to reflect status
// Now playing
if( this.audio.isPlaying() ) {
this.play.classList.remove( 'play' );
this.play.classList.add( 'pause' );
} else {
// No longer playing
this.play.classList.remove( 'pause' );
this.play.classList.add( 'play' );
}
}
// Called when a file selection is made
// When different selection
doSelectorChange( evt ) {
// Clear copy
// Fill with progress indicator
this.copy.innerHTML = '&nbsp;';
this.copy.classList.add( 'spinner' );
// Hide audio waveform and playback
this.waveform.style.bottom = `${0 - this.waveform.clientHeight}px`;
// Wait for animation to complete
// Then load new waveform
setTimeout( () => {
this.audio.loadBlob( evt.target.files[0] );
}, 1000 );
// Prepare form with selected file
let form = new FormData();
form.append( 'file', evt.target.files[0] );
form.append( 'type', evt.target.files[0].type );
// Upload to server for processing
// IBM Cloud Function <-> Watson
fetch( `${Captions.WATSON_PATH}?${Captions.WATSON_PARAMS}`, {
method: 'POST',
body: form
} )
.then( ( response ) => {return response.json();} )
.then( (data ) => {
// Extract words used in audio
this.conversation = [];
for( let r = 0; r < data.results.length; r++ ) {
this.conversation = this.conversation.concat( data.results[r].alternatives[0].timestamps );
}
// Remove progress indicator
this.copy.classList.remove( 'spinner' );
// Display the audio playback control
this.waveform.style.bottom = `${Captions.WAVEFORM_BOTTOM}px`;
// Debug
console.log( data );
} );
}
// Called when Watson logo is clicked
// Opens file selection dialog
doWatsonClick( evt ) {
this.selector.click();
}
}
// Static
Captions.WATSON_PARAMS = 'speaker_labels=true';
Captions.WATSON_PATH = 'https://openwhisk.ng.bluemix.net/api/v1/web/krhoyt%40us.ibm.com_dev/watson/stt.recognize';
Captions.WAVEFORM_BOTTOM = 16;
Captions.WAVEFORM_HEIGHT = 48;
Captions.WAVEFORM_PROGRESS = '#016158';
Captions.WAVEFORM_WAVE = '#00b2b2';
// Main
const app = new Captions();
// Libraries
var fs = require( 'fs' );
var multipart = require( 'parted' ).multipart;
var request = require( 'request' );
var sts = require( 'string-to-stream' );
// Main
function main( params ) {
// Wait for response from service calls
// Otherwise function will make the request
// And promptly shut down and destroy itself
// Without ever having received a response
return new Promise( ( resolve, reject ) => {
// Decode the stream into a string
let decoded = new Buffer( params.__ow_body, 'base64' );
let stream = sts( decoded );
// Disk management
var options = {
limit: 30 * 1024,
diskLimit: 30 * 1024 * 1024
};
// Parse the parts of the string
// Will have demarkation per HTTP specification
var parser = new multipart( params.__ow_headers[ 'content-type' ], options ), parts = {};
// Whoops!
parser.on( 'error', function( err ) {
console.log( 'parser error', err );
} );
// Put parts into key/value object
parser.on( 'part', function( field, part ) {
parts[field] = part;
} );
// Parsing is complete
// Send data to Watson
parser.on( 'end', function() {
// Request to Watson
// Speech to Text
// Uploaded audio file
// Audio file is key named "file"
// Include speaker labels
// Also supports various audio types
request( {
method: 'POST',
url: params.API_URL + '?speaker_labels=true',
auth: {
username: params.API_USERNAME,
password: params.API_PASSWORD
},
headers: {
'Content-Type': parts.type
},
body: fs.createReadStream( parts.file )
}, function( err, result, body ) {
// Resolve enclosing promise
// Response from Watson will be JSON
resolve( {
headers: {
'Content-Type': 'application/json'
},
body: body
} );
} );
} );
stream.pipe( parser );
} );
}
exports.main = main;
<html>
<head>
<title>Captions</title>
<!-- Google Fonts -->
<!-- Montserrat -->
<!-- Clean, readable, sans -->
<link href="https://fonts.googleapis.com/css?family=Montserrat" rel="stylesheet">
<!-- Application styles -->
<link href="/style/captions.css" rel="stylesheet">
</head>
<body>
<!-- File interaction -->
<button class="file" src="/img/watson.svg" width="120" height="120"></button>
<!-- Instructions -->
<!-- Also progress indicator -->
<!-- Also dialog copy -->
<p class="copy">Click the Watson logo<br>to select an audio file.</p>
<!-- Waveform -->
<div class="waveform">
<div class="speaker"></div>
<div class="audio"></div>
<button class="play"></button>
</div>
<!-- File selector -->
<!-- Hidden for custom user interface -->
<input type="file" accept="audio/*">
<!-- Libraries -->
<script src="https://unpkg.com/wavesurfer.js"></script>
<!-- Application -->
<script src="/script/captions.js"></script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment