Skip to content

Instantly share code, notes, and snippets.

@Leko
Last active November 13, 2018 10:12
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Leko/937b97724def8de90b8fe97a3bfb639c to your computer and use it in GitHub Desktop.
Save Leko/937b97724def8de90b8fe97a3bfb639c to your computer and use it in GitHub Desktop.
Example of text to speech with VOICELOID(c)
.env
node_modules
const debug = require('debug')('ffmpeg')
const ffmpeg = require('fluent-ffmpeg')
ffmpeg.prototype._prepare = (function (org) {
return function(callback, readMetadata) {
org.call(this, (err, args) => {
debug(args.join(' '))
callback(err, args)
}, readMetadata)
}
}(ffmpeg.prototype._prepare))
module.exports = ffmpeg
const debug = require('debug')('fetch')
const fetch = require('isomorphic-fetch')
module.exports = async (url, opts) => {
debug(`${opts.method} ${url}`)
const response = await fetch(url, opts)
debug(`${response.status} ${response.statusText}`)
if (!response.ok) {
debug(response.headers)
}
return response
}
{
"name": "playbook-to-voices",
"version": "0.1.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"async": {
"version": "2.5.0",
"resolved": "https://registry.npmjs.org/async/-/async-2.5.0.tgz",
"integrity": "sha512-e+lJAJeNWuPCNyxZKOBdaJGyLGHugXVQtrAwtuAe2vhxTYxFTKE73p8JuTmdH0qdQZtDvI4dhJwjZc5zsfIsYw==",
"requires": {
"lodash": "4.17.4"
}
},
"bluebird": {
"version": "3.5.0",
"resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.5.0.tgz",
"integrity": "sha1-eRQg1/VR7qKJdFOop3ZT+WYG1nw="
},
"commander": {
"version": "2.11.0",
"resolved": "https://registry.npmjs.org/commander/-/commander-2.11.0.tgz",
"integrity": "sha512-b0553uYA5YAEGgyYIGYROzKQ7X5RAqedkfjiZxwi0kL1g3bOaBNNZfYkzt/CL0umgD5wc9Jec2FbB98CjkMRvQ=="
},
"csv-parse": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-1.2.0.tgz",
"integrity": "sha1-BHtzhoq5qFdG6IX2N/ntD7ZFpCU="
},
"debug": {
"version": "2.6.8",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.8.tgz",
"integrity": "sha1-5zFTHKLt4n0YgiJCfaF4IdaP9Pw=",
"requires": {
"ms": "2.0.0"
}
},
"encoding": {
"version": "0.1.12",
"resolved": "https://registry.npmjs.org/encoding/-/encoding-0.1.12.tgz",
"integrity": "sha1-U4tm8+5izRq1HsMjgp0flIDHS+s=",
"requires": {
"iconv-lite": "0.4.18"
}
},
"fluent-ffmpeg": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/fluent-ffmpeg/-/fluent-ffmpeg-2.1.2.tgz",
"integrity": "sha1-yVLeIkD4EuvaCqgAbXd27irPfXQ=",
"requires": {
"async": "2.5.0",
"which": "1.2.14"
}
},
"iconv-lite": {
"version": "0.4.18",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.18.tgz",
"integrity": "sha512-sr1ZQph3UwHTR0XftSbK85OvBbxe/abLGzEnPENCQwmHf7sck8Oyu4ob3LgBxWWxRoM+QszeUyl7jbqapu2TqA=="
},
"is-stream": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-1.1.0.tgz",
"integrity": "sha1-EtSj3U5o4Lec6428hBc66A2RykQ="
},
"isexe": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
"integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA="
},
"isomorphic-fetch": {
"version": "2.2.1",
"resolved": "https://registry.npmjs.org/isomorphic-fetch/-/isomorphic-fetch-2.2.1.tgz",
"integrity": "sha1-YRrhrPFPXoH3KVB0coGf6XM1WKk=",
"requires": {
"node-fetch": "1.7.1",
"whatwg-fetch": "2.0.3"
}
},
"lodash": {
"version": "4.17.4",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.4.tgz",
"integrity": "sha1-eCA6TRwyiuHYbcpkYONptX9AVa4="
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
},
"node-fetch": {
"version": "1.7.1",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-1.7.1.tgz",
"integrity": "sha512-j8XsFGCLw79vWXkZtMSmmLaOk9z5SQ9bV/tkbZVCqvgwzrjAGq66igobLofHtF63NvMTp2WjytpsNTGKa+XRIQ==",
"requires": {
"encoding": "0.1.12",
"is-stream": "1.1.0"
}
},
"ssml-builder": {
"version": "0.2.4",
"resolved": "https://registry.npmjs.org/ssml-builder/-/ssml-builder-0.2.4.tgz",
"integrity": "sha1-dN+9OPmvU+0q0mlKWDb09mY38WM="
},
"uuid": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-3.1.0.tgz",
"integrity": "sha512-DIWtzUkw04M4k3bf1IcpS2tngXEL26YUD2M0tMDUpnUrz2hgzUBlD55a4FjdLGPvfHxS6uluGWvaVEqgBcVa+g=="
},
"whatwg-fetch": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-2.0.3.tgz",
"integrity": "sha1-nITsLc9oGH/wC8ZOEnS0QhduHIQ="
},
"which": {
"version": "1.2.14",
"resolved": "https://registry.npmjs.org/which/-/which-1.2.14.tgz",
"integrity": "sha1-mofEN48D6CfOyvGs31bHNsAcFOU=",
"requires": {
"isexe": "2.0.0"
}
}
}
}
{
"name": "playbook-to-voices",
"version": "0.1.0",
"description": "Convert playbook to voices with VOICELOAD(c)",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [
"text2speech",
"text_to_speech",
"speechsynthesize"
],
"author": "Leko <leko.noor@gmail.com>",
"license": "MIT",
"dependencies": {
"bluebird": "^3.5.0",
"commander": "^2.11.0",
"csv-parse": "^1.2.0",
"debug": "^2.6.8",
"fluent-ffmpeg": "^2.1.2",
"isomorphic-fetch": "^2.2.1",
"ssml-builder": "^0.2.4",
"uuid": "^3.1.0"
}
}
#!/usr/bin/env node
const fs = require('fs')
const querystring = require('querystring')
const commander = require('commander')
const Promise = require('bluebird')
const uuid = require('uuid/v4')
const parse = Promise.promisify(require('csv-parse'))
const debug = require('debug')
const SSML = require('ssml-builder')
const ffmpeg = require('./debuggable-fluent-ffmpeg')
const fetch = require('./debuggable-isomorphic-fetch')
const pkg = require('./package.json')
const unlink = Promise.promisify(fs.unlink)
const readFile = Promise.promisify(fs.readFile)
const writeFile = Promise.promisify(fs.writeFile)
const PRESET_NAME_DEFAULT = 'default'
const PRESET_DEFAULT = { rate: 1, pitch: 1, range: 1, volume: 1 }
SSML.prototype.voice = function (name, text) {
this._elements.push(`<voice name="${name}">` + text + '</voice>')
return this
}
SSML.prototype.ssml = (function (org) {
return function () {
const xmlPrefix = '<?xml version="1.0" encoding="utf-8" ?>\n'
return xmlPrefix + org.call(this)
.replace('<speak>', '<speak version="1.1">')
.replace(/> </g, '><')
}
})(SSML.prototype.ssml)
const parseCSV = async (path) => {
debug('playbook:csv')(path)
return readFile(path, 'utf-8').then(parse)
}
const parsePlaybook = async (path) => {
const playbook = await parseCSV(path)
return playbook.slice(1)
}
const parsePresets = async (presetPath) => {
const presetRows = await parseCSV(presetPath)
return presetRows.slice(1).reduce((acc, row) => {
const [ voice, name, rate, pitch, range, volume ] = row
debug('playbook:preset')(`${voice}(${name || PRESET_NAME_DEFAULT}): ${JSON.stringify({ rate, pitch, range, volume })}`)
return Object.assign(acc, {
[voice]: {
[name || PRESET_NAME_DEFAULT]: Object.assign({}, PRESET_DEFAULT, { rate, pitch, range, volume })
}
})
}, {})
}
const appendVoice = (ssml, voice, presetName, txt, presets) => {
const map = {
月読アイ: (ssml, txt) => ssml.voice('anzu', txt),
弦巻マキ: (ssml, txt) => ssml.voice('maki', txt),
結月ゆかり: (ssml, txt) => ssml.voice('sumire', txt),
}
let preset = PRESET_DEFAULT
if (!map[voice]) {
throw new Error(`Unknown voice: ${voice}`)
}
if (presets[voice] && presets[voice][presetName || PRESET_NAME_DEFAULT]) {
preset = presets[voice][presetName || PRESET_NAME_DEFAULT]
}
const filteredPreset = Object.entries(preset).filter(([name, val]) => !!val)
if (filteredPreset.length <= 0) {
throw new Error(`Empty preset: ${voice}, ${presetName || PRESET_NAME_DEFAULT}`)
}
const attribtues = filteredPreset
.map(([name, val]) => `${name}="${val}"`)
.join(' ')
debug('playbook:voice')(`${voice}(${attribtues})「${txt}」`)
map[voice](ssml, `<prosody ${attribtues}>${txt}</prosody>`)
return ssml
}
const generatePresets = async (presetPath) => {
if (presetPath) {
return parsePresets(presetPath)
} else {
return Promise.resolve({})
}
}
const toSSML = (genPresets) => async (voices) => {
const presets = await genPresets
const ssml = voices.reduce((acc, [voice, presetName, text]) => {
return appendVoice(acc, voice, presetName, text, presets)
}, new SSML())
return ssml.ssml()
}
const textToSpeech = async (ssml) => {
const ENDPOINT = 'https://api.apigw.smt.docomo.ne.jp/aiTalk/v1/textToSpeech'
const query = querystring.stringify({
APIKEY: process.env.DOCOMO_API_KEY,
})
return fetch(`${ENDPOINT}?${query}`, {
method: 'POST',
body: ssml,
headers: {
'Content-Type': 'application/ssml+xml',
'Accept': 'audio/L16',
}
})
}
const storeTemporary = async (response) => {
if (response.ok) {
const path = `/tmp/${uuid()}`
debug('playbook:storeTemporary')(path)
return response.buffer()
.then(buff => writeFile(path, buff))
.then(() => path)
} else {
return response.text()
.then(text => Promise.reject(text))
}
}
const toWav = (destPath) => async (pcmPath) => {
return new Promise((resolve, reject) => {
const cmd = ffmpeg()
.input(pcmPath)
.inputOptions(['-ac 1', '-ar 16000'])
.inputFormat('s16be')
.output(destPath)
.on('end', () => {
console.log(destPath)
unlink(pcmPath).then(resolve)
})
.on('error', reject)
cmd.run()
})
}
commander
.version(pkg.version)
.arguments('<playbook>')
.option('-p, --presets [path]', 'Define prosody presets')
.option('-o, --output [path]', 'Set output path')
.action((playbook) => {
if (!playbook) {
commander.outputHelp()
return
}
const output = commander.output || playbook.replace('.csv', '') + '.wav'
parsePlaybook(playbook)
.then(toSSML(generatePresets(commander.presets)))
.then(textToSpeech)
.then(storeTemporary)
.then(toWav(output))
.catch(e => {
console.error(e)
process.exit(1)
})
})
.parse(process.argv)
voice preset text
弦巻マキ セヤナー グレートエレキファイア
voice name rate pitch range volume
弦巻マキ セヤナー 0.5 2.0 2.0
voice name rate pitch range volume
ゆっくり霊夢 default 1
弦巻マキ default 1.4
結月ゆかり default 1.4 1.2
月読アイ default 1.4
We can make this file beautiful and searchable if this error is corrected: Illegal quoting in line 11.
voice,preset,text
結月ゆかり,,皆さんこんにちは、結月ゆかりです
弦巻マキ,,"<phoneme ph=""ツル’/マ’キ"">弦巻</phoneme>マキです"
月読アイ,,ゆっくり霊夢です
結月ゆかり,,突然ですけど私、スーパーハカーになりました!
月読アイ,,この人いきなり何言ってんだ…
弦巻マキ,,なろうと思って簡単になれるものじゃないぞ
弦巻マキ,,あとハカーじゃなくてハッカーね
結月ゆかり,,ゆかりさんの華麗なハッキング技術で
結月ゆかり,,お前たちの個人情報を丸裸にしてやる!
結月ゆかり,,具体的には<phoneme ph=""パソコン"">PC</phoneme>の<phoneme ph=""ディー"">D</phoneme>ドライブの中身を晒してやる!
月読アイ,,やめてください!社会的に死ぬ人が出るのでやめてください!
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment