Skip to content

Instantly share code, notes, and snippets.

@tohn
Last active June 15, 2016 12:44
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save tohn/eccb6550b8703bcb6b3b to your computer and use it in GitHub Desktop.
Given an input .txt file, this will output a videofile, subtitles and a thumbnail for a YouTube video.
#!/bin/bash
# requirements
# TODO: test for mbrola-voices-de{1..8}
for i in wget awk mbrola aplay sox gmic ffmpeg convert espeak ; do
command -v "$i" >/dev/null 2>&1 || { echo >&2 "I require \"$i\" but it's not installed. Aborting."; exit 1; }
done
# some variables, adjust to your needs
url="http://bitimage.dyndns.org/german/MartinLuther-1912/Martin_Luther_Uebersetzung_1912.txt"
lines=100
out="$HOME/tmp/$(basename "$0" .sh)"
# create project folder
if [ ! -d "$out" ] ; then
mkdir -p "$out"
fi
bn="$out/$(basename "$0")"
txt="$out/martin_luther_uebersetzung_1912.txt"
concat="$bn-concat.wav"
cfile="${bn}.c"
binfile="${bn}.bin"
raw="${bn}.raw"
bgmusic="${bn}-bgmusic.wav"
thumbnail="${bn}.tmp.png"
thumbnail2="${bn}.png"
srt="${bn}.srt"
wav="${bn}.wav"
mp4="${bn}.mp4"
res_w="1920"
res_h="1080"
yt_w="1280"
yt_h="720"
debug=99
# if $txt is not available, download it
if [ ! -e "$txt" ] ; then
wget -q -O "$txt" "$url"
fi
# get length of $txt
txtlen=$(wc -l<"$txt")
# generate md5sum of text, and convert to rgba
hex=$(md5sum "$txt" | awk '{print $1}' | tr '[:lower:]' '[:upper:]')
hex1=$(cut -c-8 <<<"$hex")
hex2=$(cut -c9-16 <<<"$hex")
hex3=$(cut -c17-24 <<<"$hex")
hex4=$(cut -c25-32 <<<"$hex")
rgba1=$(printf "%d,%d,%d,%d" 0x"${hex1:0:2}" 0x"${hex1:2:2}" 0x"${hex1:4:2}" 0x"${hex1:6:2}")
rgba2=$(printf "%d,%d,%d,%d" 0x"${hex2:0:2}" 0x"${hex2:2:2}" 0x"${hex2:4:2}" 0x"${hex2:6:2}")
rgba3=$(printf "%d,%d,%d,%d" 0x"${hex3:0:2}" 0x"${hex3:2:2}" 0x"${hex3:4:2}" 0x"${hex3:6:2}")
rgba4=$(printf "%d,%d,%d,%d" 0x"${hex4:0:2}" 0x"${hex4:2:2}" 0x"${hex4:4:2}" 0x"${hex4:6:2}")
# convert seconds to hours:minutes:seconds:milliseconds
function s2t() {
T=$(cut -d\. -f1 <<<"$1")
H=$((T/60/60%24))
M=$((T/60%60))
S=$((T%60))
MS=$(cut -d\. -f2 <<<"$1")
if [[ "$MS" == "0" ]] ; then MS="000" ; else MS=$(cut -c-3 <<<"$MS") ; fi
printf '%02d:%02d:%02d,%03s' "$H" "$M" "$S" "$MS"
}
# read (german) text and save them to individual files
last=0.0
> "$srt"
fmt -t "$txt" | cat -n | head -n "$lines" | while read -r number line ; do
tmpwav="$bn-espeak-$(printf "%05d\n" "$number").wav"
voice=$(( (number % 6)+2 )) # only voices 2-7 available
pitch=$(( number % 100 )) # pitch from 0-99
espeak "$line" -vmb-de"$voice" -s 125 -p "$pitch" -w "$tmpwav"
# change bit/sample rate to something equal with sox
sox "$tmpwav" -r 16000 -c 2 "${tmpwav}".wav
mv "${tmpwav}".wav "$tmpwav"
# add subtitle
l=$(soxi -D "$tmpwav")
n=$(echo "$last+$l" | bc)
{ echo "$number"; echo "$(s2t "$last") --> $(s2t "$n")"; echo "$line"; echo ""; } >> "$srt"
last="$n"
done
# concatenate the wav files
ffmpeg -y -f concat -i <( for f in ${bn}-espeak-*.wav ; do echo "file '$f'" ; \
done ) -c copy "$concat" -loglevel quiet
# create background music
# choose from these functions based on $txtlen%10
echo "main(t){for(;;t++)putchar(" > "$cfile"
case $((txtlen%10)) in
0) echo "t*((t>>12|t>>8)&63&t>>4)" >> "$cfile" ;;
1) echo "(t*(t>>5|t>>8))>>(t>>16)" >> "$cfile" ;;
2) echo "t*((t>>9|t>>13)&25&t>>6)" >> "$cfile" ;;
3) echo "t*(t>>11&t>>8&123&t>>3)" >> "$cfile" ;;
4) echo "(t*(t>>8*(t>>15|t>>8)&(20|(t>>19)*5>>t|t>>3))" >> "$cfile" ;;
5) echo "(t*5&t>>7)|(t*3&t>>10)" >> "$cfile" ;;
6) echo "t*(t>>((t>>9|t>>8))&63&t>>4)" >> "$cfile" ;;
7) echo "(t>>6|t|t>>(t>>16))*10+((t>>11)&7)" >> "$cfile" ;;
8) echo "(t>>7|t|t>>6)*10+4*(t&t>>13|t>>6)" >> "$cfile" ;;
9) echo "((t*(t>>8|t>>9)&46&t>>8))^(t&t>>13|t>>6)" >> "$cfile" ;;
esac
echo ");}" >> "$cfile"
# compile the program
gcc -w "$cfile" -o "$binfile" >/dev/null 2>&1
# output 1M lines of raw audio
#"$binfile" | head -n 1M > "$raw"
"$binfile" | dd bs=1024 count=1024 > "$raw"
# get length of $concat
len=$(soxi -D "$concat")
# TODO: check, if $len is enough
# convert raw audio to wav and trim it to $len
sox -r 16000 -e signed -b 16 -c 2 -v -0.1 "$raw" "$bgmusic" trim 0 "$len"
# mix background music with $concat
# sox -m $bgmusic $concat $wav
# don't mix it, ignore the background music
cp "$concat" "$wav"
# add video to audio
ffmpeg -y -loglevel quiet -i "$wav" -filter_complex \
avectorscope=s=${res_w}x${res_h} -acodec aac -strict -2 "$mp4"
# also nice:
#ffmpeg -y -loglevel quiet -i $wav -filter_complex \
# "[0:a]showwaves=s=${res_w}x${res_h}:rate=25,format=yuv420p[vid]" \
# -map "[vid]" -map 0:a -codec:v libx264 -crf 18 -preset fast \
# -c:a aac -strict -2 -b:a 256k $mp4
# thumbnail for youtube (png, 1280x720)
# create gradient thumbnail with rgba values
gmic -v -"$debug" "$yt_w,$yt_h,1,3" -gimp_corner_gradient "$rgba1,$rgba2,$rgba3,$rgba4" -o "$thumbnail"
# another thumbnail generation idea (but not as beautiful)
#dd if=$txt bs=$(($yt_w*$yt_h*3)) count=1 | convert -size ${yt_w}x${yt_h} -depth 8 rgb:- $thumbnail
# caption text
text="The first
$lines
lines of
$(basename "$txt"),
fully generated by a program."
# add caption to thumbnail
convert -background '#0008' -fill white -gravity center -size "${yt_w}x$((yt_h/4))" \
caption:"$text" "$thumbnail" +swap -gravity south -composite "$thumbnail2"
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment