greg-randall/generate_audiobook.sh

## generate_audiobook.sh
#!/bin/bash

# takes a text file and makes an audiobook of it ex:
# ./generate_audiobook.sh input_book.txt


# a few notes,

# first, you'll need to install edge-tts (pip3 install edge-tts), sox (sudo apt-get install sox), and
# opus (sudo apt-get install opus-tools)

# second, you'll have to get your ebook into txt format calibre is helpful for this. also, for example the
# time machine (https://www.gutenberg.org/files/35/35-0.txt) needs manual clean up of extra text, chapter listings,
# index, glossary, etc.

# third, below there's a variable for threads, that's how many simultaneious requests we make from edge-tts,
# 6 seems fastest on my machine, but try higher and see if it goes faster

# fourth, below there's a variable for 'chars_per_request' this changes the approximate number of charecters per
# file that get sent to edge-tts 2000-5000 seems like a good balance of size vs speed of request

threads=6
chars_per_request=2000

input=${1}

id=$(uuid)


cp "$input" "$id-$input.txt"

printf "$input to $id-$input.txt\n"

mkdir $id-audiobook
cd $id-audiobook

char_count=0
file_count=0

printf "\n\n\nsplitting book:\n\n"
while IFS= read -r line #this loop splits files into files with approximately the number of charecters in chars_per_request
do
  echo $line >> book_split_$( printf "%05d" $file_count).txt # add the line to the current output file
  char_count=$(( ${#line} + $char_count)) # add the count of the current line to the total charecter count
  if [ $char_count -gt $chars_per_request ] #if we've exceeded the charecters per request we'll reset the counters and increment the current file
  then
    printf "book_split_$( printf "%05d" $file_count).txt\n"
    file_count=$((file_count+1))
    char_count=0
  fi
done < "../$id-$input.txt"


if test -f "book_split_$( printf "%05d" $file_count).txt"; then #it's possible the splitting loop finished with the last file blank, we check for that here
  printf "book_split_$( printf "%05d" $file_count).txt\n"
fi

printf "\n\n\nproccessing tts:\n\n"

#find our split text files, and send them to edge-tts to be made into mp3s
find . -maxdepth 1  -type f -name "book_split*" -print0 | shuf | perl -pe 's/\n//gi' | xargs -t --verbose -0 -P $threads -I %  edge-tts --write-subtitles /dev/null -f % --voice en-US-SteffanNeural --write-media %.mp3

mkdir text-split
mv *.txt text-split

mkdir audio-files
mv *.mp3 audio-files

cd audio-files

printf "\n\n\ndecoding mp3s:\n\n"

#find all the mp3s and convert them to wav files for conversion (i was using ffmpeg for this, but it was introducing an odd chirping sound)
find *.mp3 -print0 | xargs -t --verbose -0 -P $threads -I %  lame --quiet --decode %

printf "\n\n\nencoding opus:\n\n"

#sox combines the wav files and we pipe them into opus
sox $(ls *.wav | sort -n) -t wav - | opusenc --downmix-mono --bitrate 32 --vbr - ../../$id-$input-full.opus

rm *.wav
	#!/bin/bash

	# takes a text file and makes an audiobook of it ex:
	# ./generate_audiobook.sh input_book.txt



	# a few notes,

	# first, you'll need to install edge-tts (pip3 install edge-tts), sox (sudo apt-get install sox), and
	# opus (sudo apt-get install opus-tools)

	# second, you'll have to get your ebook into txt format calibre is helpful for this. also, for example the
	# time machine (https://www.gutenberg.org/files/35/35-0.txt) needs manual clean up of extra text, chapter listings,
	# index, glossary, etc.

	# third, below there's a variable for threads, that's how many simultaneious requests we make from edge-tts,
	# 6 seems fastest on my machine, but try higher and see if it goes faster

	# fourth, below there's a variable for 'chars_per_request' this changes the approximate number of charecters per
	# file that get sent to edge-tts 2000-5000 seems like a good balance of size vs speed of request

	threads=6
	chars_per_request=2000

	input=${1}

	id=$(uuid)


	cp "$input" "$id-$input.txt"

	printf "$input to $id-$input.txt\n"

	mkdir $id-audiobook
	cd $id-audiobook

	char_count=0
	file_count=0

	printf "\n\n\nsplitting book:\n\n"
	while IFS= read -r line #this loop splits files into files with approximately the number of charecters in chars_per_request
	do
	echo $line >> book_split_$( printf "%05d" $file_count).txt # add the line to the current output file
	char_count=$(( ${#line} + $char_count)) # add the count of the current line to the total charecter count
	if [ $char_count -gt $chars_per_request ] #if we've exceeded the charecters per request we'll reset the counters and increment the current file
	then
	printf "book_split_$( printf "%05d" $file_count).txt\n"
	file_count=$((file_count+1))
	char_count=0
	fi
	done < "../$id-$input.txt"


	if test -f "book_split_$( printf "%05d" $file_count).txt"; then #it's possible the splitting loop finished with the last file blank, we check for that here
	printf "book_split_$( printf "%05d" $file_count).txt\n"
	fi

	printf "\n\n\nproccessing tts:\n\n"

	#find our split text files, and send them to edge-tts to be made into mp3s
	find . -maxdepth 1 -type f -name "book_split*" -print0 \| shuf \| perl -pe 's/\n//gi' \| xargs -t --verbose -0 -P $threads -I % edge-tts --write-subtitles /dev/null -f % --voice en-US-SteffanNeural --write-media %.mp3

	mkdir text-split
	mv *.txt text-split

	mkdir audio-files
	mv *.mp3 audio-files

	cd audio-files

	printf "\n\n\ndecoding mp3s:\n\n"

	#find all the mp3s and convert them to wav files for conversion (i was using ffmpeg for this, but it was introducing an odd chirping sound)
	find *.mp3 -print0 \| xargs -t --verbose -0 -P $threads -I % lame --quiet --decode %

	printf "\n\n\nencoding opus:\n\n"

	#sox combines the wav files and we pipe them into opus
	sox $(ls *.wav \| sort -n) -t wav - \| opusenc --downmix-mono --bitrate 32 --vbr - ../../$id-$input-full.opus

	rm *.wav