tohn/text2youtubevideo.sh

## text2youtubevideo.sh
#!/bin/bash

# requirements
# TODO: test for mbrola-voices-de{1..8}
for i in wget awk mbrola aplay sox gmic ffmpeg convert espeak ; do
	command -v "$i" >/dev/null 2>&1 || { echo >&2 "I require \"$i\" but it's not installed. Aborting."; exit 1; }
done

# some variables, adjust to your needs
url="http://bitimage.dyndns.org/german/MartinLuther-1912/Martin_Luther_Uebersetzung_1912.txt"
lines=100
out="$HOME/tmp/$(basename "$0" .sh)"
# create project folder
if [ ! -d "$out" ] ; then
	mkdir -p "$out"
fi
bn="$out/$(basename "$0")"
txt="$out/martin_luther_uebersetzung_1912.txt"
concat="$bn-concat.wav"
cfile="${bn}.c"
binfile="${bn}.bin"
raw="${bn}.raw"
bgmusic="${bn}-bgmusic.wav"
thumbnail="${bn}.tmp.png"
thumbnail2="${bn}.png"
srt="${bn}.srt"
wav="${bn}.wav"
mp4="${bn}.mp4"
res_w="1920"
res_h="1080"
yt_w="1280"
yt_h="720"
debug=99

# if $txt is not available, download it
if [ ! -e "$txt" ] ; then
	wget -q -O "$txt" "$url"
fi

# get length of $txt
txtlen=$(wc -l<"$txt")

# generate md5sum of text, and convert to rgba
hex=$(md5sum "$txt" | awk '{print $1}' | tr '[:lower:]' '[:upper:]')
hex1=$(cut -c-8 <<<"$hex")
hex2=$(cut -c9-16 <<<"$hex")
hex3=$(cut -c17-24 <<<"$hex")
hex4=$(cut -c25-32 <<<"$hex")
rgba1=$(printf "%d,%d,%d,%d" 0x"${hex1:0:2}" 0x"${hex1:2:2}" 0x"${hex1:4:2}" 0x"${hex1:6:2}")
rgba2=$(printf "%d,%d,%d,%d" 0x"${hex2:0:2}" 0x"${hex2:2:2}" 0x"${hex2:4:2}" 0x"${hex2:6:2}")
rgba3=$(printf "%d,%d,%d,%d" 0x"${hex3:0:2}" 0x"${hex3:2:2}" 0x"${hex3:4:2}" 0x"${hex3:6:2}")
rgba4=$(printf "%d,%d,%d,%d" 0x"${hex4:0:2}" 0x"${hex4:2:2}" 0x"${hex4:4:2}" 0x"${hex4:6:2}")

# convert seconds to hours:minutes:seconds:milliseconds
function s2t() {
	T=$(cut -d\. -f1 <<<"$1")
	H=$((T/60/60%24))
	M=$((T/60%60))
	S=$((T%60))
	MS=$(cut -d\. -f2 <<<"$1")
	if [[ "$MS" == "0" ]] ; then MS="000" ; else MS=$(cut -c-3 <<<"$MS") ; fi
	printf '%02d:%02d:%02d,%03s' "$H" "$M" "$S" "$MS"
}

# read (german) text and save them to individual files
last=0.0
> "$srt"
fmt -t "$txt" | cat -n | head -n "$lines" | while read -r number line ; do
	tmpwav="$bn-espeak-$(printf "%05d\n" "$number").wav"
	voice=$(( (number % 6)+2 )) # only voices 2-7 available
	pitch=$(( number % 100 )) # pitch from 0-99
	espeak "$line" -vmb-de"$voice" -s 125 -p "$pitch" -w "$tmpwav"
	# change bit/sample rate to something equal with sox
	sox "$tmpwav" -r 16000 -c 2 "${tmpwav}".wav
	mv "${tmpwav}".wav "$tmpwav"
	# add subtitle
	l=$(soxi -D "$tmpwav")
	n=$(echo "$last+$l" | bc)
	{ echo "$number"; echo "$(s2t "$last") --> $(s2t "$n")"; echo "$line"; echo ""; } >> "$srt"
	last="$n"
done

# concatenate the wav files
ffmpeg -y -f concat -i <( for f in ${bn}-espeak-*.wav ; do echo "file '$f'" ; \
	done ) -c copy "$concat" -loglevel quiet

# create background music
# choose from these functions based on $txtlen%10
echo "main(t){for(;;t++)putchar(" > "$cfile"
case $((txtlen%10)) in
	0) echo "t*((t>>12|t>>8)&63&t>>4)" >> "$cfile" ;;
	1) echo "(t*(t>>5|t>>8))>>(t>>16)" >> "$cfile" ;;
	2) echo "t*((t>>9|t>>13)&25&t>>6)" >> "$cfile" ;;
	3) echo "t*(t>>11&t>>8&123&t>>3)" >> "$cfile" ;;
	4) echo "(t*(t>>8*(t>>15|t>>8)&(20|(t>>19)*5>>t|t>>3))" >> "$cfile" ;;
	5) echo "(t*5&t>>7)|(t*3&t>>10)" >> "$cfile" ;;
	6) echo "t*(t>>((t>>9|t>>8))&63&t>>4)" >> "$cfile" ;;
	7) echo "(t>>6|t|t>>(t>>16))*10+((t>>11)&7)" >> "$cfile" ;;
	8) echo "(t>>7|t|t>>6)*10+4*(t&t>>13|t>>6)" >> "$cfile" ;;
	9) echo "((t*(t>>8|t>>9)&46&t>>8))^(t&t>>13|t>>6)" >> "$cfile" ;;
esac
echo ");}" >> "$cfile"

# compile the program
gcc -w "$cfile" -o "$binfile" >/dev/null 2>&1

# output 1M lines of raw audio
#"$binfile" | head -n 1M > "$raw"
"$binfile" | dd bs=1024 count=1024 > "$raw"

# get length of $concat
len=$(soxi -D "$concat")
# TODO: check, if $len is enough

# convert raw audio to wav and trim it to $len
sox -r 16000 -e signed -b 16 -c 2 -v -0.1 "$raw" "$bgmusic" trim 0 "$len"

# mix background music with $concat
# sox -m $bgmusic $concat $wav
# don't mix it, ignore the background music
cp "$concat" "$wav"

# add video to audio
ffmpeg -y -loglevel quiet -i "$wav" -filter_complex \
	avectorscope=s=${res_w}x${res_h} -acodec aac -strict -2 "$mp4"
# also nice:
#ffmpeg -y -loglevel quiet -i $wav -filter_complex \
#	"[0:a]showwaves=s=${res_w}x${res_h}:rate=25,format=yuv420p[vid]" \
#	-map "[vid]" -map 0:a -codec:v libx264 -crf 18 -preset fast \
#	-c:a aac -strict -2 -b:a 256k $mp4

# thumbnail for youtube (png, 1280x720)
# create gradient thumbnail with rgba values
gmic -v -"$debug" "$yt_w,$yt_h,1,3" -gimp_corner_gradient "$rgba1,$rgba2,$rgba3,$rgba4" -o "$thumbnail"

# another thumbnail generation idea (but not as beautiful)
#dd if=$txt bs=$(($yt_w*$yt_h*3)) count=1 | convert -size ${yt_w}x${yt_h} -depth 8 rgb:- $thumbnail

# caption text
text="The first
$lines
lines of
$(basename "$txt"),
fully generated by a program."

# add caption to thumbnail
convert -background '#0008' -fill white -gravity center -size "${yt_w}x$((yt_h/4))" \
	caption:"$text" "$thumbnail" +swap -gravity south -composite "$thumbnail2"

exit 0
	#!/bin/bash

	# requirements
	# TODO: test for mbrola-voices-de{1..8}
	for i in wget awk mbrola aplay sox gmic ffmpeg convert espeak ; do
	command -v "$i" >/dev/null 2>&1 \|\| { echo >&2 "I require \"$i\" but it's not installed. Aborting."; exit 1; }
	done

	# some variables, adjust to your needs
	url="http://bitimage.dyndns.org/german/MartinLuther-1912/Martin_Luther_Uebersetzung_1912.txt"
	lines=100
	out="$HOME/tmp/$(basename "$0" .sh)"
	# create project folder
	if [ ! -d "$out" ] ; then
	mkdir -p "$out"
	fi
	bn="$out/$(basename "$0")"
	txt="$out/martin_luther_uebersetzung_1912.txt"
	concat="$bn-concat.wav"
	cfile="${bn}.c"
	binfile="${bn}.bin"
	raw="${bn}.raw"
	bgmusic="${bn}-bgmusic.wav"
	thumbnail="${bn}.tmp.png"
	thumbnail2="${bn}.png"
	srt="${bn}.srt"
	wav="${bn}.wav"
	mp4="${bn}.mp4"
	res_w="1920"
	res_h="1080"
	yt_w="1280"
	yt_h="720"
	debug=99

	# if $txt is not available, download it
	if [ ! -e "$txt" ] ; then
	wget -q -O "$txt" "$url"
	fi

	# get length of $txt
	txtlen=$(wc -l<"$txt")

	# generate md5sum of text, and convert to rgba
	hex=$(md5sum "$txt" \| awk '{print $1}' \| tr '[:lower:]' '[:upper:]')
	hex1=$(cut -c-8 <<<"$hex")
	hex2=$(cut -c9-16 <<<"$hex")
	hex3=$(cut -c17-24 <<<"$hex")
	hex4=$(cut -c25-32 <<<"$hex")
	rgba1=$(printf "%d,%d,%d,%d" 0x"${hex1:0:2}" 0x"${hex1:2:2}" 0x"${hex1:4:2}" 0x"${hex1:6:2}")
	rgba2=$(printf "%d,%d,%d,%d" 0x"${hex2:0:2}" 0x"${hex2:2:2}" 0x"${hex2:4:2}" 0x"${hex2:6:2}")
	rgba3=$(printf "%d,%d,%d,%d" 0x"${hex3:0:2}" 0x"${hex3:2:2}" 0x"${hex3:4:2}" 0x"${hex3:6:2}")
	rgba4=$(printf "%d,%d,%d,%d" 0x"${hex4:0:2}" 0x"${hex4:2:2}" 0x"${hex4:4:2}" 0x"${hex4:6:2}")

	# convert seconds to hours:minutes:seconds:milliseconds
	function s2t() {
	T=$(cut -d\. -f1 <<<"$1")
	H=$((T/60/60%24))
	M=$((T/60%60))
	S=$((T%60))
	MS=$(cut -d\. -f2 <<<"$1")
	if [[ "$MS" == "0" ]] ; then MS="000" ; else MS=$(cut -c-3 <<<"$MS") ; fi
	printf '%02d:%02d:%02d,%03s' "$H" "$M" "$S" "$MS"
	}

	# read (german) text and save them to individual files
	last=0.0
	> "$srt"
	fmt -t "$txt" \| cat -n \| head -n "$lines" \| while read -r number line ; do
	tmpwav="$bn-espeak-$(printf "%05d\n" "$number").wav"
	voice=$(( (number % 6)+2 )) # only voices 2-7 available
	pitch=$(( number % 100 )) # pitch from 0-99
	espeak "$line" -vmb-de"$voice" -s 125 -p "$pitch" -w "$tmpwav"
	# change bit/sample rate to something equal with sox
	sox "$tmpwav" -r 16000 -c 2 "${tmpwav}".wav
	mv "${tmpwav}".wav "$tmpwav"
	# add subtitle
	l=$(soxi -D "$tmpwav")
	n=$(echo "$last+$l" \| bc)
	{ echo "$number"; echo "$(s2t "$last") --> $(s2t "$n")"; echo "$line"; echo ""; } >> "$srt"
	last="$n"
	done

	# concatenate the wav files
	ffmpeg -y -f concat -i <( for f in ${bn}-espeak-*.wav ; do echo "file '$f'" ; \
	done ) -c copy "$concat" -loglevel quiet

	# create background music
	# choose from these functions based on $txtlen%10
	echo "main(t){for(;;t++)putchar(" > "$cfile"
	case $((txtlen%10)) in
	0) echo "t*((t>>12\|t>>8)&63&t>>4)" >> "$cfile" ;;
	1) echo "(t*(t>>5\|t>>8))>>(t>>16)" >> "$cfile" ;;
	2) echo "t*((t>>9\|t>>13)&25&t>>6)" >> "$cfile" ;;
	3) echo "t*(t>>11&t>>8&123&t>>3)" >> "$cfile" ;;
	4) echo "(t(t>>8(t>>15\|t>>8)&(20\|(t>>19)*5>>t\|t>>3))" >> "$cfile" ;;
	5) echo "(t5&t>>7)\|(t3&t>>10)" >> "$cfile" ;;
	6) echo "t*(t>>((t>>9\|t>>8))&63&t>>4)" >> "$cfile" ;;
	7) echo "(t>>6\|t\|t>>(t>>16))*10+((t>>11)&7)" >> "$cfile" ;;
	8) echo "(t>>7\|t\|t>>6)10+4(t&t>>13\|t>>6)" >> "$cfile" ;;
	9) echo "((t*(t>>8\|t>>9)&46&t>>8))^(t&t>>13\|t>>6)" >> "$cfile" ;;
	esac
	echo ");}" >> "$cfile"

	# compile the program
	gcc -w "$cfile" -o "$binfile" >/dev/null 2>&1

	# output 1M lines of raw audio
	#"$binfile" \| head -n 1M > "$raw"
	"$binfile" \| dd bs=1024 count=1024 > "$raw"

	# get length of $concat
	len=$(soxi -D "$concat")
	# TODO: check, if $len is enough

	# convert raw audio to wav and trim it to $len
	sox -r 16000 -e signed -b 16 -c 2 -v -0.1 "$raw" "$bgmusic" trim 0 "$len"

	# mix background music with $concat
	# sox -m $bgmusic $concat $wav
	# don't mix it, ignore the background music
	cp "$concat" "$wav"

	# add video to audio
	ffmpeg -y -loglevel quiet -i "$wav" -filter_complex \
	avectorscope=s=${res_w}x${res_h} -acodec aac -strict -2 "$mp4"
	# also nice:
	#ffmpeg -y -loglevel quiet -i $wav -filter_complex \
	# "[0:a]showwaves=s=${res_w}x${res_h}:rate=25,format=yuv420p[vid]" \
	# -map "[vid]" -map 0:a -codec:v libx264 -crf 18 -preset fast \
	# -c:a aac -strict -2 -b:a 256k $mp4

	# thumbnail for youtube (png, 1280x720)
	# create gradient thumbnail with rgba values
	gmic -v -"$debug" "$yt_w,$yt_h,1,3" -gimp_corner_gradient "$rgba1,$rgba2,$rgba3,$rgba4" -o "$thumbnail"

	# another thumbnail generation idea (but not as beautiful)
	#dd if=$txt bs=$(($yt_w$yt_h3)) count=1 \| convert -size ${yt_w}x${yt_h} -depth 8 rgb:- $thumbnail

	# caption text
	text="The first
	$lines
	lines of
	$(basename "$txt"),
	fully generated by a program."

	# add caption to thumbnail
	convert -background '#0008' -fill white -gravity center -size "${yt_w}x$((yt_h/4))" \
	caption:"$text" "$thumbnail" +swap -gravity south -composite "$thumbnail2"

	exit 0