Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Converts an epub or text file to audiobook via Google Cloud TTS
#!/usr/bin/env python3
To use:
1. install/set-up the google cloud api and dependencies listed on
2. install pandoc and pypandoc, also tqdm
3. create and download a service_account.json ("Service account key") from
4. run GOOGLE_APPLICATION_CREDENTIALS=service_account.json python book_name.epub
import re
import sys
import time
from datetime import datetime as dt
from pathlib import Path
from import texttospeech
from tqdm import tqdm
import pypandoc
# see
def book_to_text(book_file):
return pypandoc.convert_file(book_file, "plain", extra_args=["--wrap=none"])
except RuntimeError:
print("Format not recognized. Treating as plain text...")
with open(book_file, encoding="utf-8") as book:
def clean_text_chunk(text_chunk):
# remove _italics_
text_chunk = re.sub(r"_", " ", text_chunk)
# remove --- hyphens for footnotes
text_chunk = re.sub(r"(\-{3,})", "Footnote:", text_chunk)
return text_chunk
class Narrator:
def __init__(self, voice_name="en-US-Wavenet-F"):
self.client = texttospeech.TextToSpeechClient()
self.voice = texttospeech.types.VoiceSelectionParams(
language_code="en-US", name=voice_name
self.audio_config = texttospeech.types.AudioConfig(
# rate limit stuff
self._minute = -1
self._requests_this_minute = 0
self._chars_this_minute = 0
def print_voice_names(self, lang="en"):
print("Available voices for language {}:".format(lang))
for voice in self.client.list_voices().voices:
def _rate_limit(self):
if (
self._requests_this_minute > MAX_REQUESTS_PER_MINUTE
or self._chars_this_minute > MAX_CHARS_PER_MINUTE
while == self._minute:
if != self._minute:
self._minute =
self._requests_this_minute = 0
self._chars_this_minute = 0
def _text_chunk_to_audio_chunk(self, text_chunk):
input_text = texttospeech.types.SynthesisInput(text=text_chunk)
response = self.client.synthesize_speech(
input_text, self.voice, self.audio_config
self._requests_this_minute += 1
self._chars_this_minute += len(text_chunk)
return response.audio_content
def text_to_mp3(self, text, file_dest):
assert file_dest.suffix == ".mp3"
lines = text.splitlines()
with"wb") as out:
for i, text_chunk in enumerate(tqdm(lines, desc=file_dest.stem)):
# skip empty lines
if text_chunk:
text_chunk = clean_text_chunk(text_chunk)
audio_chunk = self._text_chunk_to_audio_chunk(text_chunk)
# this is fine because mp3s can be concatenated naively and still work
def main():
if not sys.argv[1:]:
"Usage: GOOGLE_APPLICATION_CREDENTIALS=service_account.json {} book_name.epub".format(
narrator = Narrator()
# narrator.print_voice_names()
for book_file in sys.argv[1:]:
text = book_to_text(book_file)
mp3_path = Path(book_file).with_suffix(".mp3")
narrator.text_to_mp3(text, mp3_path)
print("Generated mp3", mp3_path)
# I have another script that uploads to overcast...
# import subprocess as sp
#" '" + str(mp3_path) + "'", shell=True)
if __name__ == "__main__":
Copy link

madebyollin commented Apr 2, 2020

It won't work with a PDF out of the box (since pandoc does not import PDFs). But you can run pdftotext to extract the text first and then run this script on the extracted text. In general, extracting comprehensible text from a PDF is a nightmare, but for simple documents it should work.

Copy link

mysticaltech commented Apr 2, 2020

Ok, thanks for the info and the script, I appreciate it! 🙏🏻

Copy link

rahulsavsani commented Sep 15, 2020

The script is awesome and works well! I want to modify the pitch in voice. Can you suggest me some code for that?

Copy link

madebyollin commented Sep 15, 2020

The script is awesome and works well! I want to modify the pitch in voice. Can you suggest me some code for that?

you can add pitch / rate parameters to the audio config (line 48 of the script) as per their documentation, e.g.


Copy link

kerenon commented Nov 8, 2020

Damn this is awesome. Thank you. I think I'm gonna try to turn this into a small personal project. I spent the last hour throwing code at the wall to see what sticks, and it already can create opus albums (1 track / chapter).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment