Skip to content

Instantly share code, notes, and snippets.

@anotherdirtbag
Last active September 26, 2023 02:28
Show Gist options
  • Save anotherdirtbag/7edf6780c962f9b09b929b59ad8501c2 to your computer and use it in GitHub Desktop.
Save anotherdirtbag/7edf6780c962f9b09b929b59ad8501c2 to your computer and use it in GitHub Desktop.
A python 3 script to read the text from an epub file using either online(google wavenet) or offline (sapi) tts and save as a series of opus files.
import os
import sys
import zipfile
import time
import re
import urllib3
from dataclasses import dataclass
from bs4 import BeautifulSoup
#pip3 install --user --upgrade beautifulsoup4
didntconvert = set()
ttsengine = 'google'
#'sapi', 'google', or 'pyttsx3'
#for sapi: pip3 install --user --upgrade git+https://github.com/DeepHorizons/tts
#for google: pip3 install --user --upgrade google-cloud-texttospeech google-cloud-storage
# you'll also need to make a Google Cloud account and install the Cloud SDK https://cloud.google.com/sdk/
# and occasionally run: gcloud components update
#for pyttsx3: pip3 install --user --upgrade pyttsx3 pypiwin32
currentdir = os.getcwd()
epub_doc = os.path.join(currentdir,'The Strongest System - Xin Feng.epub')
credentialsfile = os.path.join(currentdir,'MY_SECRET_CREDENTIALS_FILE.json')
tempaudio = os.path.join(currentdir,'tempaudiofile')
outputdirectory = os.path.join(currentdir, os.path.basename(epub_doc)[:-len('.epub')] + ' (' + ttsengine + ')')
if not os.path.exists(outputdirectory):
os.mkdir(outputdirectory)
if ttsengine == 'sapi':
import tts.sapi
sapivoice = tts.sapi.Sapi()
sapivoice.set_rate(1)
sapivoice.set_voice(sapivoice.get_voices()[1])
elif ttsengine == 'google':
#pip3 install --user --upgrade google-cloud-texttospeech
from google.cloud import texttospeech
#from google.cloud.texttospeech import enums
from google.oauth2 import service_account
client = texttospeech.TextToSpeechClient(credentials=credentials)
#print(str(client.list_voices()))
voice = texttospeech.VoiceSelectionParams(
language_code='en-US',
name='en-US-Wavenet-C')
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.OGG_OPUS,
effects_profile_id=['large-home-entertainment-class-device'])
charactermontlyquota = 999900
totalcharacters = 0
charactersperrequest = 4995
requestsperminute = 300
requestcounter = 0
requeststart = time.time()
elif ttsengine == 'pyttsx3':
import pyttsx3
engine = pyttsx3.init()
voices = engine.getProperty('voices')
engine.setProperty('voice', voices[1].id)
def parsechaptername(soup):
chaptertitle = ''
for line in soup.select("title"):
chaptertitle += str(line.string).strip()
chaptertitle = re.sub(r'[^\w\d\ \-]','', chaptertitle)
return chaptertitle
def parseepubtext(soup):
chaptertext = ''
for line in soup.select('p'):
chaptertext += str(line.get_text()).strip() + '\n'
chaptertext = re.sub(r'\n\d+\n','\n',chaptertext) #remove page numbers
return chaptertext
#this worked for the epub file i had, but the formatting is likely different for others.
#google tts only
def dosplitrequests(chaptertext):
global charactersperrequest
splitchar = '.'
splitrequests = []
if len(chaptertext) < charactersperrequest:
splitrequests = [chaptertext]
else:
lines = chaptertext.split(splitchar)
thisrequest = ''
for someline in lines:
if len(thisrequest + someline + splitchar) < charactersperrequest:
thisrequest += someline + splitchar
elif len(thisrequest) == 0:
print('single lines >' + str(charactersperrequest) + ' characters not supported. try spliting by spaces instead')
sys.exit(1)
else:
splitrequests.append(thisrequest)
if len(someline) > charactersperrequest:
print('sentence longer than ' + str(charactersperrequest) )
sys.exit(1)
thisrequest = someline + splitchar
splitrequests.append(thisrequest)
return splitrequests
#google tts only
def wait_on_requestsperminute():
global requestsperminute
global requestcounter
global requeststart
while(True):
now = time.time()
if (now - requeststart) > 60:
requestcounter = 1
requeststart = now
break
elif requestcounter < requestsperminute:
requestcounter += 1
break
else:
print('wait 10')
time.sleep(10)
#source and dst should be the full path wihtout any double-quotes
def ffmpeg(source, dst):
ffmpegpath = os.path.join(currentdir, r'ffmpeg\bin')
command = 'cd "' + ffmpegpath + '" '
if sys.platform == 'win32':
command += '&& ffmpeg.exe '
if not os.path.exists( os.path.join(ffmpegpath, 'ffmpeg.exe')):
print( os.path.join(ffmpegpath, 'ffmpeg.exe') + ' not found')
sys.exit(1)
else: # sys.platform.startswith('linux'):
command += '&& ./ffmpeg '
if not os.path.exists( os.path.join(ffmpegpath, 'ffmpeg')):
print( os.path.join(ffmpegpath, 'ffmpeg') + ' not found')
sys.exit(1)
command += '-i "' + source + '" '
command += '-map 0:a '
#command += '-c:a libvorbis -aq 2 -ac 1 -f ogg ' #an alternate if libopus isn't available
command += '-c:a libopus -ac 1 -b:a 32k -application voip -vbr on -compression_level 10 -f opus '
command += '-y "' + dst + '"'
print(command)
os.system(command)
def savetts(chaptertext, audiofile, secondpass = False):
global totalcharacters
global charactermontlyquota
totalcharacters += len(chaptertext)
print('totalcharacters ' + str(totalcharacters), end='\t\t\t\r')
if totalcharacters > charactermontlyquota and charactermontlyquota > 0:
print('reached character limit')
print(os.path.basename(audiofile))
quit()
#chaptertext = str(chaptertext).strip()
global ttsengine
if ttsengine == 'sapi':
global sapivoice
sapivoice.create_recording(tempaudio + '.wav', chaptertext)
ffmpeg(tempaudio + '.wav', audiofile)
elif ttsengine == 'google':
global client
global voice
global audio_config
splitrequests = dosplitrequests(chaptertext)
reqindex = 1
zfillen = len(str(len(splitrequests)))
audiofilenoext = audiofile[:-len('.opus')]
for req in splitrequests:
if len(splitrequests) > 1:
thistempaudio = audiofilenoext + '.' + str(reqindex).zfill(zfillen)
reqindex += 1
else:
thistempaudio = audiofilenoext
if (not secondpass and not os.path.exists(thistempaudio +'.opus')) or ( secondpass and os.path.basename(thistempaudio) in didntconvert):
wait_on_requestsperminute()
input_text = texttospeech.SynthesisInput(text=req)
#synthreq = texttospeech.SynthesizeSpeechRequest({"input":input_text, "voice":voice, "audio_config":audio_config})
response = client.synthesize_speech(input=input_text, voice=voice, audio_config=audio_config, retry=None)
if response:
# The response's audio_content is binary.
with open(thistempaudio +'.opus', 'wb') as out:
out.write(response.audio_content)
print(os.path.basename(thistempaudio +'.opus'),end='\t\t\t\t')
else:
print('error converting ' + thistempaudio,end='\t\t\t\t')
didntconvert.add(thistempaudio)
time.sleep(30) #otherwise almost 1/2 of the files use a lower quality voice
#ffmpeg(tempaudio + '.opus', audiofile)
elif ttsengine == 'pyttsx3':
global engine
engine.say(chaptertext)
engine.runAndWait()
def epub_to_text(outputtxt):
with open(outputtxt, 'w', encoding = 'utf-8-sig') as foutput:
with zipfile.ZipFile(epub_doc) as zip:
zipfilenames = []
for somename in zip.namelist():
if str(somename)[-len('.html'):] == '.html':
zipfilenames.append(str(somename)[:-len('.html')])
zipfilenames.sort(key = int)
for chapterfilename in zipfilenames:
ftxt = zip.read(chapterfilename + '.html')
soup = BeautifulSoup(ftxt, 'html.parser')
chaptertext = parseepubtext(soup)
foutput.write(chaptertext)
if __name__ == "__main__":
with zipfile.ZipFile(epub_doc) as zip:
print(str(len(zip.namelist())))
zipfilenames = []
for somename in zip.namelist():
if str(somename)[-len('.html'):] == '.html':
zipfilenames.append(str(somename)[:-len('.html')])
zipfilenames.sort(key = int)
#limitchapters = 100
zfillen = len(str(len(zipfilenames)))
for chapterfilename in zipfilenames:
#if int(chapterfilename) > 699 and int(chapterfilename) < 699 + limitchapters:
print(chapterfilename)
audiofile = os.path.join( outputdirectory, chapterfilename.zfill(zfillen) + '.opus')
ftxt = zip.read(chapterfilename + '.html')
soup = BeautifulSoup(ftxt, 'html.parser')
chaptertext = parseepubtext(soup)
savetts(chaptertext, audiofile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment