tatsuyasusukida/!README-gcp-text-to-speech.md

## !README-gcp-text-to-speech.md

      
    Raw
  

              !README-gcp-text-to-speech.md
            
          
    🎤 How to convert text to speech using Google Text-to-Speech in Node.js [demo video available]


About this article

This article describes how to convert text to speech with Cloud Text-to-Speech from Google Cloud Platform (GCP) in Node.js. The related resources are shown below.

Demo video
Source code
日本語版 / Japanese version

Workflow

The workflow is shown below.

Coding preparation
Coding
Operation check

Coding preparation

Run the following commands to prepare for coding.
mkdir gcp-text-to-speech
cd gcp-text-to-speech
npm init -y
npm install --save @google-cloud/text-to-speech
touch input.txt main.mjs
Coding

main.mjs

Open main.mjs in your editor and enter the following content.
Click to go to main.mjs
input.txt

Open index.txt in your editor and enter the content of the text you want to convert to speech. An example is shown below.
Click to go to input.example.txt
Operation check

Run the following command to access the GCP API.
gcloud auth application-default login
Run the following command to convert the text to speech.
node main.mjs
Conclusion

I tried to use Google Text-to-Speech to generate the audio to use in the video when creating The demo video of how to record a video with JavaScript. But I was a little surprised to find that it was not available from the CLI (gcloud command) or GUI (web console). While there is a document to create audio from text by using the command line, but it seemed like it would take a lot of work to execute the curl command and send an HTTP request to the API endpoint. Therefore, I thought it would be quicker to use the Node.js client library, so I created the source code. This article was born based on the source code created at that time.

  
## .gitignore
/node_modules/
/output/
/input.txt
/package-lock.json
# Do no ignore package-lock.json other than gist

## input.example.txt
こんにちは。
動作を確認するにはまず npm install コマンドを実行します。
次に npm run login コマンドを実行します。
最後に node main.mjs コマンドを実行します。
音声は output ディレクトリに出力されます。

## main.mjs
import path from 'path'
import fsPromises from 'fs/promises'
import textToSpeech from '@google-cloud/text-to-speech'

main()

async function main () {
  try {
    const {pathname} = new URL(import.meta.url)
    const basedir = path.dirname(pathname)
    const date = new Date().toISOString().replace(/[-:]/g, '')
    const dirname = path.join(basedir, 'output', date)
    await fsPromises.mkdir(dirname, {recursive: true})

    const sourceDefault = path.join(basedir, 'input.txt')
    const source = process.env.SOURCE || sourceDefault
    const texts = (await fsPromises.readFile(source))
      .toString()
      .split('\n')
      .filter(line => !/^\s*$/.test(line))

    let i = 0

    for (const text of texts) {
      const number = ('' + i).padStart(2, '0')
      const title = text.replace(/\//g, '-')
      const basename = `${number}.${title}.mp3`
      const destination = path.join(dirname, basename)

      const client = new textToSpeech.TextToSpeechClient()
      const request = {
        input:{text},
        voice: {
          languageCode: 'ja-jp',
          name: 'ja-JP-Standard-A',
          ssmlGender: 'FEMALE',
        },
        audioConfig: {
          audioEncoding: 'MP3',
        },
      }

      const [response] = await client.synthesizeSpeech(request)

      if (response.error) {
        throw response.error
      }

      const buffer = Buffer.from(response.audioContent, 'base64')
      await fsPromises.writeFile(destination, buffer)

      i += 1
    }
  } catch (err) {
    console.error(err)
  }
}

## package.json
{
  "name": "gcp-text-to-speech",
  "version": "1.0.0",
  "description": "",
  "main": "main.js",
  "scripts": {
    "login": "gcloud auth application-default login",
    "start": "node main.mjs",
    "clean": "rm -rf output",
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "keywords": [],
  "author": "",
  "license": "ISC",
  "dependencies": {
    "@google-cloud/text-to-speech": "^3.4.0"
  }
}
	/node_modules/
	/output/
	/input.txt
	/package-lock.json
	# Do no ignore package-lock.json other than gist
	こんにちは。
	動作を確認するにはまず npm install コマンドを実行します。
	次に npm run login コマンドを実行します。
	最後に node main.mjs コマンドを実行します。
	音声は output ディレクトリに出力されます。
	import path from 'path'
	import fsPromises from 'fs/promises'
	import textToSpeech from '@google-cloud/text-to-speech'

	main()

	async function main () {
	try {
	const {pathname} = new URL(import.meta.url)
	const basedir = path.dirname(pathname)
	const date = new Date().toISOString().replace(/[-:]/g, '')
	const dirname = path.join(basedir, 'output', date)
	await fsPromises.mkdir(dirname, {recursive: true})

	const sourceDefault = path.join(basedir, 'input.txt')
	const source = process.env.SOURCE \|\| sourceDefault
	const texts = (await fsPromises.readFile(source))
	.toString()
	.split('\n')
	.filter(line => !/^\s*$/.test(line))

	let i = 0

	for (const text of texts) {
	const number = ('' + i).padStart(2, '0')
	const title = text.replace(/\//g, '-')
	const basename = `${number}.${title}.mp3`
	const destination = path.join(dirname, basename)

	const client = new textToSpeech.TextToSpeechClient()
	const request = {
	input:{text},
	voice: {
	languageCode: 'ja-jp',
	name: 'ja-JP-Standard-A',
	ssmlGender: 'FEMALE',
	},
	audioConfig: {
	audioEncoding: 'MP3',
	},
	}

	const [response] = await client.synthesizeSpeech(request)

	if (response.error) {
	throw response.error
	}

	const buffer = Buffer.from(response.audioContent, 'base64')
	await fsPromises.writeFile(destination, buffer)

	i += 1
	}
	} catch (err) {
	console.error(err)
	}
	}
	{
	"name": "gcp-text-to-speech",
	"version": "1.0.0",
	"description": "",
	"main": "main.js",
	"scripts": {
	"login": "gcloud auth application-default login",
	"start": "node main.mjs",
	"clean": "rm -rf output",
	"test": "echo \"Error: no test specified\" && exit 1"
	},
	"keywords": [],
	"author": "",
	"license": "ISC",
	"dependencies": {
	"@google-cloud/text-to-speech": "^3.4.0"
	}
	}