drorm/speak.service.ts

## speak.service.ts
import { Injectable } from '@angular/core';
import * as globals from '../globals';
import { NGXLogger, NgxLoggerLevel } from 'ngx-logger';
import { HttpClient } from '@angular/common/http';
import * as $ from 'jquery';
import { SettingsService } from '../utils/settings.service';

/**
 * @title Speak service,
 * Service to speak and highlight the words on the page.
 * Playnodes is the entrypoint.
 * 1. Sends the text on the page to the server to get parsed by AWS poly
 * 2. Gets a list of sentences and speaks one sentence at the time
 * 3. While the sentence is played the words are highlighted
 */

declare const Hilitor: any; // from vendor/hilitor.js
const READ_RATE = 1.0;
const SENTENCE_DELAY = 1000; // delay time at the end of a sentence before starting a new one

@Injectable({
  providedIn: 'root'
})

export class SpeakService {
  isReading: boolean; // Are we currently in the middle of reading a sentence
  stopRequest: boolean; // Request stop of reading when the page is turned, for instance
  hilitor: any; // object to hilight the word in the page
  audio: any = new Audio(); // browser audio object
  userSetting: any; // user settings
  start: number; // start time of reading
  language: string; // language of the book
  inElement; // Is the text in an element, or in an iframe typically a book?
  VOICE_SERVER = globals.vars.server.snd;
  copyrightRegEx = globals.copyrightRegEx;
  bookPosition: number; // Position in the book we're reading

  constructor(
    private http: HttpClient,
    private logger: NGXLogger,
    private settingsService: SettingsService,
  ) {
    this.userSetting = settingsService.userSettings;
  }

  checkCopyrights(page): boolean {
    let isCopyright = false;

    this.getJson(page);
    const text = page.sentences;
    text.map(el => {
      if (el.value.search(this.copyrightRegEx) > 0) {
        isCopyright = true;
      }
    });
    return isCopyright;
  }

  /**
   * Play the text nodes on a page.
   */

  async playNodes(nodes, language, inElement, progress?) {
    this.logger.debug('playNodes', language, nodes);
    this.language = language;
    this.inElement = inElement;
    this.bookPosition = progress;
    // Get the JSON with the sentences and the words in the sentence
    const section = await this.getJson(nodes);
    if (section && !this.checkCopyrights(section)) {
      this.isReading = true;
      // play the section
      await this.playSection(section);
    }
    this.isReading = false;
  }

  /*
   * Get a JSON object with the description of the different sentences in the nodes for this page.
   * Example of a response with a single sentence, but can have multiple sentences.
   * Amazon Polly does the heavy NLP work of breaking down the sentence to its part
   * This example has one sentence, but we can get multiple sentences on a page.
   *   {
   *    "sentences": [
   *        {
   *            "end": 42,
   *            "start": 27,
   *            "time": 0,
   *            "type": "sentence",
   *            "value": "A Beautiful Day",
   *            "words": [
   *                {
   *                    "end": 28,
   *                    "start": 27,
   *                    "time": 6,
   *                    "type": "word",
   *                    "value": "A"
   *                },
   *                {
   *                    "end": 38,
   *                    "start": 29,
   *                    "time": 96,
   *                    "type": "word",
   *                    "value": "Beautiful"
   *                },
   *                {
   *                    "end": 42,
   *                    "start": 39,
   *                    "time": 754,
   *                    "type": "word",
   *                    "value": "Day"
   *                }
   *            ]
   *        }
   *    ]
   *  }
   */

  async getJson(node) {
    let text = $(node).text();
    if (text && text !== '') { // ignore empty strings
      // remove new lines from the text
      text = text.replace(/^[\s\r\n]+$/, '');
      this.logger.debug('text', text);
      // Use GET when we can since is cached but POST is not
      if (text.length < 2000) {
        // https://stackoverflow.com/questions/2659952/maximum-length-of-http-get-request
        const query = `text=${text}&lang=${this.language}`;
        const textinfo = await this.http.get(`${this.VOICE_SERVER}/textInfo?${query}`).toPromise();
        return(textinfo);
      } else {
      // Longer stringsrequire use of POST
      const textinfo = await this.http.post(`${this.VOICE_SERVER}/textInfo`, {
        text: text,
        lang: this.language
      }
      ).toPromise();
      return(textinfo);
    }
    }
    return(null);
  }

  /**
   * Play a section, typically a page from a book
   * Each page is composed of multiple sentences that we play one at the time
   */
  async playSection(textInfo) {
    this.logger.debug('section', textInfo);
    for (let ii = 0; ii < textInfo.sentences.length; ii++) {
      if (this.stopRequest) { // request to stop reading
        this.logger.debug('stop request');
        this.removeHilight();
        return;
      }

      const sentence = textInfo.sentences[ii];
      // Initalize to play the sentence
      this.hilitor = new Hilitor(this.userSetting.hilite, this.inElement);
      this.audio.src = `${this.VOICE_SERVER}/play?text=${sentence.value}&lang=${this.language}`;
      this.audio.load();
      this.audio.playbackRate = this.userSetting.speed;
      this.audio.play();
      this.start = new Date().getTime();
      // Play the sentence
      this.logger.debug(`play ${ii} of ${textInfo.sentences.length} `, sentence);
      await this.playSentence(sentence, 0, this.bookPosition);
      this.removeHilight();
    }
  }

  /**
   * Simply play/speak text without highlighting it
   * @param {String} text - The text we're speaking
   * @param {String} lang - the language
   */
  async playText(text, lang) {
      this.logger.debug('init playText ', text, lang);
      this.audio.pause(); // Stop anything else that we're speaking
      this.audio.src = `${this.VOICE_SERVER}/play?text=${text}&lang=${this.language}`;
      this.audio.load();
      this.audio.playbackRate = this.userSetting.speed;
      this.audio.play();
  }

  /**
   * Play a single sentence
   * This is called again and again and syncs the highlighting
   * with the reading of the sentence.
   * @param {sentence} object - See getJson for the structure
   * @param {position} number - the position of the current word
   * @param {bookPosition} number - the position of the page in the book
   */
  async playSentence(sentence, position, bookPosition) {
    if (!this.hilitor || !this.hilitor.apply ||   // can happen when the books is closed
      (bookPosition !== this.bookPosition)) { // can happen when page is turned multiple times in a row
      return;
    }

    let wordPosition = position;

    const currTime = this.audio.currentTime * 1000;
    const currWord = sentence.words[wordPosition];
    const wordText = currWord.value;
    this.logger.debug(this.bookPosition, ':', wordPosition, ':', wordText, ':', currWord.time, ':', currTime, ':', new Date().getTime());

    if ((currWord.time - sentence.time) <= currTime) { // new word
      // tslint:disable-next-line
      this.logger.debug(`actual bookPosition: ${this.bookPosition}, passed bookPosition: ${bookPosition}  wordPosition:${wordPosition} word:${wordText} startTime:${currWord.time},  currTime:${currTime} actualTime: ${new Date().getTime()}`);
      if (wordPosition > 0) {
        this.removeHilight();
      }

      this.logger.debug('hilitor', this.hilitor);
      this.hilitor.apply(wordText);
      this.logger.debug(wordPosition, ':', wordText, ':', currWord.time, ':', currTime, ':',
        new Date().getTime() -  this.start);
      wordPosition++;
    }

    // asked to stop, just return
    if (this.audio.ended || this.stopRequest) {
      this.removeHilight();
      return;
    }

    if (wordPosition < sentence.words.length) {
      await this.delay(50);
      await this.playSentence(sentence, wordPosition, bookPosition); // look for the next one
    } else {
      // last word
      await this.delay(SENTENCE_DELAY / this.userSetting.speed); // adjust the delay to the reading speed
      this.removeHilight();
    }
  }

  // pause the reading
  async pause() {
    if (this.isReading) {
      this.stopRequest = true; // request to stop the current reading
      this.audio.pause();
      this.audio.currentTime = 0;
      await this.delay(500);
    }
    this.stopRequest = false; // ready to read the next section
  }

  // Basically do a sleep.
  delay(ms) {
    return new Promise((resolve) => {
      return setTimeout(resolve, ms);
    });
  }

  /**
   * Remove highlighting of a word
   */

  removeHilight() {
    if (this.hilitor) {
      this.hilitor.remove();
    }
  }

}

## tts.ts
const Conf = require('./config');
const fs = require('fs');
const aws = require('aws-sdk');
const slug = require('slug');
const path = require('path');

const conf = new Conf();

const log = conf.logger;
const db = conf.pgConnect;

const polly = new aws.Polly({
  signatureVersion: 'v4',
  region: 'us-east-1',
});

const clientPath = path.join(__dirname, './snd');

/**
* Do the actual text to speech conversion using Amazon Polly
* Has two modes:
* Generate the JSON that describes the text
* Generate the MP3 of the actual sound
*/

class Tts {
  /**
   * Create a Tts.
   * @param {object} params - The params passed to polly
   * @param {String} text - The text to convert
   */

  constructor(params, text, readRate) {
    this.text = text;
    this.params = params;
    this.readRate = readRate;
    this.slug = slug(text).substring(0, 128); // used for file names limit length
    this.fileName = `${this.slug}.${params.OutputFormat}`;
    this.fileName = path.join(clientPath, this.fileName);
    log.info(params);
  }

  /**
 * Set the file name of the MP3 or json file
 * The file name is the path + the slug + id + extension
 * File name can only be 256 chars so we truncate the file name
 * and add the id for unuqenesse.
 * * @param {number} id - The id in the db
 */
  setFileName(id) {
    this.fileName = `${this.slug}-id${id}.${this.params.OutputFormat}`;
    this.fileName = path.join(clientPath, this.fileName);
    // console.log('filename:', this.fileName);
  }

  getStat() {
    try {
      const stat = fs.statSync(this.fileName);
      return (stat);
    } catch (err) {
      return (false);
    }
  }

  async convertToJson() {
    const lines = fs.readFileSync(this.fileName).toString().split('\n');
    const sentences = [];
    let currSentence;
    for (let ii = 0; ii < lines.length; ii++) {
      const line = lines[ii];
      if (line !== '') {
        const obj = JSON.parse(line);
        if (obj.type === 'sentence') {
          currSentence = obj;
          currSentence.words = []; // a sentence object has an array of words
          sentences.push(currSentence);
        } else {
          currSentence.words.push(obj);
        }
      }
    }
    const json = {
      sentences,
    };
    log.info(json);
    fs.writeFileSync(this.fileName, JSON.stringify(json), 'utf8');
  }

  async runPolly() {
    return new Promise(async (resolve, reject) => {
      const result = await db('speechinfo').select('speechinfo_id').where({
        txt: this.text,
        format: this.params.OutputFormat,
      });
      if (result[0]) { // It's in the db
        this.setFileName(result[0].speechinfo_id);
        console.log('speechinfo_id', result[0].speechinfo_id);
        resolve();
      } else {
        console.log('NO speechinfo_id', result);
      }
      // simple caching, if we fetched it earlier, just reuse it
      if (this.getStat()) {
        log.info('File exists, not doing anything');
        resolve();
      } else {
        polly.synthesizeSpeech(this.params, async (err, data) => {
          if (err) {
            log.error(err);
            reject(err);
          } else if (data) {
            const insertInfo = {
              txt: this.text,
              slug: this.slug,
              format: this.params.OutputFormat,
              read_rate: this.readRate,
              voiceid: this.params.VoiceId,
            };
            log.info(insertInfo);
            const insertId = await db('speechinfo').insert(insertInfo).returning('speechinfo_id');
            log.info('insertid', insertId);
            this.setFileName(insertId);
            if (data.AudioStream instanceof Buffer) {
              fs.writeFile(this.fileName, data.AudioStream, (fsErr) => {
                if (fsErr) {
                  log.error('Error write', err);
                  reject(err);
                } else {
                  if (this.params.OutputFormat === 'json') {
                    this.convertToJson();
                  }
                  log.info(`${this.fileName} was saved!`);
                  resolve();
                }
              });
            }
          }
        });
      }
    });
  }

  name() {
    return (this.fileName);
  }
}
module.exports = Tts;

/*
const text = 'I still have a dream, a dream deeply rooted in the American dream';
const defaultParams = {
  TextType: 'ssml',
  Text: `<speak><prosody rate="65%">${text}</prosody></speak>`,
  VoiceId: 'Ivy',
};


defaultParams.OutputFormat = 'mp3';
const tts = new Tts(defaultParams, text);
tts.runPolly();
*/
	import { Injectable } from '@angular/core';
	import * as globals from '../globals';
	import { NGXLogger, NgxLoggerLevel } from 'ngx-logger';
	import { HttpClient } from '@angular/common/http';
	import * as $ from 'jquery';
	import { SettingsService } from '../utils/settings.service';

	/**
	* @title Speak service,
	* Service to speak and highlight the words on the page.
	* Playnodes is the entrypoint.
	* 1. Sends the text on the page to the server to get parsed by AWS poly
	* 2. Gets a list of sentences and speaks one sentence at the time
	* 3. While the sentence is played the words are highlighted
	*/

	declare const Hilitor: any; // from vendor/hilitor.js
	const READ_RATE = 1.0;
	const SENTENCE_DELAY = 1000; // delay time at the end of a sentence before starting a new one

	@Injectable({
	providedIn: 'root'
	})

	export class SpeakService {
	isReading: boolean; // Are we currently in the middle of reading a sentence
	stopRequest: boolean; // Request stop of reading when the page is turned, for instance
	hilitor: any; // object to hilight the word in the page
	audio: any = new Audio(); // browser audio object
	userSetting: any; // user settings
	start: number; // start time of reading
	language: string; // language of the book
	inElement; // Is the text in an element, or in an iframe typically a book?
	VOICE_SERVER = globals.vars.server.snd;
	copyrightRegEx = globals.copyrightRegEx;
	bookPosition: number; // Position in the book we're reading

	constructor(
	private http: HttpClient,
	private logger: NGXLogger,
	private settingsService: SettingsService,
	) {
	this.userSetting = settingsService.userSettings;
	}

	checkCopyrights(page): boolean {
	let isCopyright = false;

	this.getJson(page);
	const text = page.sentences;
	text.map(el => {
	if (el.value.search(this.copyrightRegEx) > 0) {
	isCopyright = true;
	}
	});
	return isCopyright;
	}

	/**
	* Play the text nodes on a page.
	*/

	async playNodes(nodes, language, inElement, progress?) {
	this.logger.debug('playNodes', language, nodes);
	this.language = language;
	this.inElement = inElement;
	this.bookPosition = progress;
	// Get the JSON with the sentences and the words in the sentence
	const section = await this.getJson(nodes);
	if (section && !this.checkCopyrights(section)) {
	this.isReading = true;
	// play the section
	await this.playSection(section);
	}
	this.isReading = false;
	}

	/*
	* Get a JSON object with the description of the different sentences in the nodes for this page.
	* Example of a response with a single sentence, but can have multiple sentences.
	* Amazon Polly does the heavy NLP work of breaking down the sentence to its part
	* This example has one sentence, but we can get multiple sentences on a page.
	* {
	* "sentences": [
	* {
	* "end": 42,
	* "start": 27,
	* "time": 0,
	* "type": "sentence",
	* "value": "A Beautiful Day",
	* "words": [
	* {
	* "end": 28,
	* "start": 27,
	* "time": 6,
	* "type": "word",
	* "value": "A"
	* },
	* {
	* "end": 38,
	* "start": 29,
	* "time": 96,
	* "type": "word",
	* "value": "Beautiful"
	* },
	* {
	* "end": 42,
	* "start": 39,
	* "time": 754,
	* "type": "word",
	* "value": "Day"
	* }
	* ]
	* }
	* ]
	* }
	*/

	async getJson(node) {
	let text = $(node).text();
	if (text && text !== '') { // ignore empty strings
	// remove new lines from the text
	text = text.replace(/^[\s\r\n]+$/, '');
	this.logger.debug('text', text);
	// Use GET when we can since is cached but POST is not
	if (text.length < 2000) {
	// https://stackoverflow.com/questions/2659952/maximum-length-of-http-get-request
	const query = `text=${text}&lang=${this.language}`;
	const textinfo = await this.http.get(`${this.VOICE_SERVER}/textInfo?${query}`).toPromise();
	return(textinfo);
	} else {
	// Longer stringsrequire use of POST
	const textinfo = await this.http.post(`${this.VOICE_SERVER}/textInfo`, {
	text: text,
	lang: this.language
	}
	).toPromise();
	return(textinfo);
	}
	}
	return(null);
	}

	/**
	* Play a section, typically a page from a book
	* Each page is composed of multiple sentences that we play one at the time
	*/
	async playSection(textInfo) {
	this.logger.debug('section', textInfo);
	for (let ii = 0; ii < textInfo.sentences.length; ii++) {
	if (this.stopRequest) { // request to stop reading
	this.logger.debug('stop request');
	this.removeHilight();
	return;
	}

	const sentence = textInfo.sentences[ii];
	// Initalize to play the sentence
	this.hilitor = new Hilitor(this.userSetting.hilite, this.inElement);
	this.audio.src = `${this.VOICE_SERVER}/play?text=${sentence.value}&lang=${this.language}`;
	this.audio.load();
	this.audio.playbackRate = this.userSetting.speed;
	this.audio.play();
	this.start = new Date().getTime();
	// Play the sentence
	this.logger.debug(`play ${ii} of ${textInfo.sentences.length} `, sentence);
	await this.playSentence(sentence, 0, this.bookPosition);
	this.removeHilight();
	}
	}

	/**
	* Simply play/speak text without highlighting it
	* @param {String} text - The text we're speaking
	* @param {String} lang - the language
	*/
	async playText(text, lang) {
	this.logger.debug('init playText ', text, lang);
	this.audio.pause(); // Stop anything else that we're speaking
	this.audio.src = `${this.VOICE_SERVER}/play?text=${text}&lang=${this.language}`;
	this.audio.load();
	this.audio.playbackRate = this.userSetting.speed;
	this.audio.play();
	}

	/**
	* Play a single sentence
	* This is called again and again and syncs the highlighting
	* with the reading of the sentence.
	* @param {sentence} object - See getJson for the structure
	* @param {position} number - the position of the current word
	* @param {bookPosition} number - the position of the page in the book
	*/
	async playSentence(sentence, position, bookPosition) {
	if (!this.hilitor \|\| !this.hilitor.apply \|\| // can happen when the books is closed
	(bookPosition !== this.bookPosition)) { // can happen when page is turned multiple times in a row
	return;
	}

	let wordPosition = position;

	const currTime = this.audio.currentTime * 1000;
	const currWord = sentence.words[wordPosition];
	const wordText = currWord.value;
	this.logger.debug(this.bookPosition, ':', wordPosition, ':', wordText, ':', currWord.time, ':', currTime, ':', new Date().getTime());

	if ((currWord.time - sentence.time) <= currTime) { // new word
	// tslint:disable-next-line
	this.logger.debug(`actual bookPosition: ${this.bookPosition}, passed bookPosition: ${bookPosition} wordPosition:${wordPosition} word:${wordText} startTime:${currWord.time}, currTime:${currTime} actualTime: ${new Date().getTime()}`);
	if (wordPosition > 0) {
	this.removeHilight();
	}

	this.logger.debug('hilitor', this.hilitor);
	this.hilitor.apply(wordText);
	this.logger.debug(wordPosition, ':', wordText, ':', currWord.time, ':', currTime, ':',
	new Date().getTime() - this.start);
	wordPosition++;
	}

	// asked to stop, just return
	if (this.audio.ended \|\| this.stopRequest) {
	this.removeHilight();
	return;
	}

	if (wordPosition < sentence.words.length) {
	await this.delay(50);
	await this.playSentence(sentence, wordPosition, bookPosition); // look for the next one
	} else {
	// last word
	await this.delay(SENTENCE_DELAY / this.userSetting.speed); // adjust the delay to the reading speed
	this.removeHilight();
	}
	}

	// pause the reading
	async pause() {
	if (this.isReading) {
	this.stopRequest = true; // request to stop the current reading
	this.audio.pause();
	this.audio.currentTime = 0;
	await this.delay(500);
	}
	this.stopRequest = false; // ready to read the next section
	}

	// Basically do a sleep.
	delay(ms) {
	return new Promise((resolve) => {
	return setTimeout(resolve, ms);
	});
	}

	/**
	* Remove highlighting of a word
	*/

	removeHilight() {
	if (this.hilitor) {
	this.hilitor.remove();
	}
	}

	}
	const Conf = require('./config');
	const fs = require('fs');
	const aws = require('aws-sdk');
	const slug = require('slug');
	const path = require('path');

	const conf = new Conf();

	const log = conf.logger;
	const db = conf.pgConnect;

	const polly = new aws.Polly({
	signatureVersion: 'v4',
	region: 'us-east-1',
	});

	const clientPath = path.join(__dirname, './snd');

	/**
	* Do the actual text to speech conversion using Amazon Polly
	* Has two modes:
	* Generate the JSON that describes the text
	* Generate the MP3 of the actual sound
	*/

	class Tts {
	/**
	* Create a Tts.
	* @param {object} params - The params passed to polly
	* @param {String} text - The text to convert
	*/

	constructor(params, text, readRate) {
	this.text = text;
	this.params = params;
	this.readRate = readRate;
	this.slug = slug(text).substring(0, 128); // used for file names limit length
	this.fileName = `${this.slug}.${params.OutputFormat}`;
	this.fileName = path.join(clientPath, this.fileName);
	log.info(params);
	}

	/**
	* Set the file name of the MP3 or json file
	* The file name is the path + the slug + id + extension
	* File name can only be 256 chars so we truncate the file name
	* and add the id for unuqenesse.
	* * @param {number} id - The id in the db
	*/
	setFileName(id) {
	this.fileName = `${this.slug}-id${id}.${this.params.OutputFormat}`;
	this.fileName = path.join(clientPath, this.fileName);
	// console.log('filename:', this.fileName);
	}

	getStat() {
	try {
	const stat = fs.statSync(this.fileName);
	return (stat);
	} catch (err) {
	return (false);
	}
	}

	async convertToJson() {
	const lines = fs.readFileSync(this.fileName).toString().split('\n');
	const sentences = [];
	let currSentence;
	for (let ii = 0; ii < lines.length; ii++) {
	const line = lines[ii];
	if (line !== '') {
	const obj = JSON.parse(line);
	if (obj.type === 'sentence') {
	currSentence = obj;
	currSentence.words = []; // a sentence object has an array of words
	sentences.push(currSentence);
	} else {
	currSentence.words.push(obj);
	}
	}
	}
	const json = {
	sentences,
	};
	log.info(json);
	fs.writeFileSync(this.fileName, JSON.stringify(json), 'utf8');
	}

	async runPolly() {
	return new Promise(async (resolve, reject) => {
	const result = await db('speechinfo').select('speechinfo_id').where({
	txt: this.text,
	format: this.params.OutputFormat,
	});
	if (result[0]) { // It's in the db
	this.setFileName(result[0].speechinfo_id);
	console.log('speechinfo_id', result[0].speechinfo_id);
	resolve();
	} else {
	console.log('NO speechinfo_id', result);
	}
	// simple caching, if we fetched it earlier, just reuse it
	if (this.getStat()) {
	log.info('File exists, not doing anything');
	resolve();
	} else {
	polly.synthesizeSpeech(this.params, async (err, data) => {
	if (err) {
	log.error(err);
	reject(err);
	} else if (data) {
	const insertInfo = {
	txt: this.text,
	slug: this.slug,
	format: this.params.OutputFormat,
	read_rate: this.readRate,
	voiceid: this.params.VoiceId,
	};
	log.info(insertInfo);
	const insertId = await db('speechinfo').insert(insertInfo).returning('speechinfo_id');
	log.info('insertid', insertId);
	this.setFileName(insertId);
	if (data.AudioStream instanceof Buffer) {
	fs.writeFile(this.fileName, data.AudioStream, (fsErr) => {
	if (fsErr) {
	log.error('Error write', err);
	reject(err);
	} else {
	if (this.params.OutputFormat === 'json') {
	this.convertToJson();
	}
	log.info(`${this.fileName} was saved!`);
	resolve();
	}
	});
	}
	}
	});
	}
	});
	}

	name() {
	return (this.fileName);
	}
	}
	module.exports = Tts;

	/*
	const text = 'I still have a dream, a dream deeply rooted in the American dream';
	const defaultParams = {
	TextType: 'ssml',
	Text: `<speak><prosody rate="65%">${text}</prosody></speak>`,
	VoiceId: 'Ivy',
	};


	defaultParams.OutputFormat = 'mp3';
	const tts = new Tts(defaultParams, text);
	tts.runPolly();
	*/