Last active
May 9, 2024 20:06
-
-
Save greg-randall/123a30dee1fb02751cd78a0b10338a36 to your computer and use it in GitHub Desktop.
Audiobook Generator. Takes a text file, splits it up into chunks, runs it through the Edge-TTS, combines the files into a single wav file, and then reencodes into an Opus files for minimum size/maximum quality.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# takes a text file and makes an audiobook of it ex: | |
# ./generate_audiobook.sh input_book.txt | |
# a few notes, | |
# first, you'll need to install edge-tts (pip3 install edge-tts), sox (sudo apt-get install sox), and | |
# opus (sudo apt-get install opus-tools) | |
# second, you'll have to get your ebook into txt format calibre is helpful for this. also, for example the | |
# time machine (https://www.gutenberg.org/files/35/35-0.txt) needs manual clean up of extra text, chapter listings, | |
# index, glossary, etc. | |
# third, below there's a variable for threads, that's how many simultaneious requests we make from edge-tts, | |
# 6 seems fastest on my machine, but try higher and see if it goes faster | |
# fourth, below there's a variable for 'chars_per_request' this changes the approximate number of charecters per | |
# file that get sent to edge-tts 2000-5000 seems like a good balance of size vs speed of request | |
threads=6 | |
chars_per_request=2000 | |
input=${1} | |
id=$(uuid) | |
cp "$input" "$id-$input.txt" | |
printf "$input to $id-$input.txt\n" | |
mkdir $id-audiobook | |
cd $id-audiobook | |
char_count=0 | |
file_count=0 | |
printf "\n\n\nsplitting book:\n\n" | |
while IFS= read -r line #this loop splits files into files with approximately the number of charecters in chars_per_request | |
do | |
echo $line >> book_split_$( printf "%05d" $file_count).txt # add the line to the current output file | |
char_count=$(( ${#line} + $char_count)) # add the count of the current line to the total charecter count | |
if [ $char_count -gt $chars_per_request ] #if we've exceeded the charecters per request we'll reset the counters and increment the current file | |
then | |
printf "book_split_$( printf "%05d" $file_count).txt\n" | |
file_count=$((file_count+1)) | |
char_count=0 | |
fi | |
done < "../$id-$input.txt" | |
if test -f "book_split_$( printf "%05d" $file_count).txt"; then #it's possible the splitting loop finished with the last file blank, we check for that here | |
printf "book_split_$( printf "%05d" $file_count).txt\n" | |
fi | |
printf "\n\n\nproccessing tts:\n\n" | |
#find our split text files, and send them to edge-tts to be made into mp3s | |
find . -maxdepth 1 -type f -name "book_split*" -print0 | shuf | perl -pe 's/\n//gi' | xargs -t --verbose -0 -P $threads -I % edge-tts --write-subtitles /dev/null -f % --voice en-US-SteffanNeural --write-media %.mp3 | |
mkdir text-split | |
mv *.txt text-split | |
mkdir audio-files | |
mv *.mp3 audio-files | |
cd audio-files | |
printf "\n\n\ndecoding mp3s:\n\n" | |
#find all the mp3s and convert them to wav files for conversion (i was using ffmpeg for this, but it was introducing an odd chirping sound) | |
find *.mp3 -print0 | xargs -t --verbose -0 -P $threads -I % lame --quiet --decode % | |
printf "\n\n\nencoding opus:\n\n" | |
#sox combines the wav files and we pipe them into opus | |
sox $(ls *.wav | sort -n) -t wav - | opusenc --downmix-mono --bitrate 32 --vbr - ../../$id-$input-full.opus | |
rm *.wav |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment