Last active
June 15, 2016 12:44
-
-
Save tohn/eccb6550b8703bcb6b3b to your computer and use it in GitHub Desktop.
Given an input .txt file, this will output a videofile, subtitles and a thumbnail for a YouTube video.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# requirements | |
# TODO: test for mbrola-voices-de{1..8} | |
for i in wget awk mbrola aplay sox gmic ffmpeg convert espeak ; do | |
command -v "$i" >/dev/null 2>&1 || { echo >&2 "I require \"$i\" but it's not installed. Aborting."; exit 1; } | |
done | |
# some variables, adjust to your needs | |
url="http://bitimage.dyndns.org/german/MartinLuther-1912/Martin_Luther_Uebersetzung_1912.txt" | |
lines=100 | |
out="$HOME/tmp/$(basename "$0" .sh)" | |
# create project folder | |
if [ ! -d "$out" ] ; then | |
mkdir -p "$out" | |
fi | |
bn="$out/$(basename "$0")" | |
txt="$out/martin_luther_uebersetzung_1912.txt" | |
concat="$bn-concat.wav" | |
cfile="${bn}.c" | |
binfile="${bn}.bin" | |
raw="${bn}.raw" | |
bgmusic="${bn}-bgmusic.wav" | |
thumbnail="${bn}.tmp.png" | |
thumbnail2="${bn}.png" | |
srt="${bn}.srt" | |
wav="${bn}.wav" | |
mp4="${bn}.mp4" | |
res_w="1920" | |
res_h="1080" | |
yt_w="1280" | |
yt_h="720" | |
debug=99 | |
# if $txt is not available, download it | |
if [ ! -e "$txt" ] ; then | |
wget -q -O "$txt" "$url" | |
fi | |
# get length of $txt | |
txtlen=$(wc -l<"$txt") | |
# generate md5sum of text, and convert to rgba | |
hex=$(md5sum "$txt" | awk '{print $1}' | tr '[:lower:]' '[:upper:]') | |
hex1=$(cut -c-8 <<<"$hex") | |
hex2=$(cut -c9-16 <<<"$hex") | |
hex3=$(cut -c17-24 <<<"$hex") | |
hex4=$(cut -c25-32 <<<"$hex") | |
rgba1=$(printf "%d,%d,%d,%d" 0x"${hex1:0:2}" 0x"${hex1:2:2}" 0x"${hex1:4:2}" 0x"${hex1:6:2}") | |
rgba2=$(printf "%d,%d,%d,%d" 0x"${hex2:0:2}" 0x"${hex2:2:2}" 0x"${hex2:4:2}" 0x"${hex2:6:2}") | |
rgba3=$(printf "%d,%d,%d,%d" 0x"${hex3:0:2}" 0x"${hex3:2:2}" 0x"${hex3:4:2}" 0x"${hex3:6:2}") | |
rgba4=$(printf "%d,%d,%d,%d" 0x"${hex4:0:2}" 0x"${hex4:2:2}" 0x"${hex4:4:2}" 0x"${hex4:6:2}") | |
# convert seconds to hours:minutes:seconds:milliseconds | |
function s2t() { | |
T=$(cut -d\. -f1 <<<"$1") | |
H=$((T/60/60%24)) | |
M=$((T/60%60)) | |
S=$((T%60)) | |
MS=$(cut -d\. -f2 <<<"$1") | |
if [[ "$MS" == "0" ]] ; then MS="000" ; else MS=$(cut -c-3 <<<"$MS") ; fi | |
printf '%02d:%02d:%02d,%03s' "$H" "$M" "$S" "$MS" | |
} | |
# read (german) text and save them to individual files | |
last=0.0 | |
> "$srt" | |
fmt -t "$txt" | cat -n | head -n "$lines" | while read -r number line ; do | |
tmpwav="$bn-espeak-$(printf "%05d\n" "$number").wav" | |
voice=$(( (number % 6)+2 )) # only voices 2-7 available | |
pitch=$(( number % 100 )) # pitch from 0-99 | |
espeak "$line" -vmb-de"$voice" -s 125 -p "$pitch" -w "$tmpwav" | |
# change bit/sample rate to something equal with sox | |
sox "$tmpwav" -r 16000 -c 2 "${tmpwav}".wav | |
mv "${tmpwav}".wav "$tmpwav" | |
# add subtitle | |
l=$(soxi -D "$tmpwav") | |
n=$(echo "$last+$l" | bc) | |
{ echo "$number"; echo "$(s2t "$last") --> $(s2t "$n")"; echo "$line"; echo ""; } >> "$srt" | |
last="$n" | |
done | |
# concatenate the wav files | |
ffmpeg -y -f concat -i <( for f in ${bn}-espeak-*.wav ; do echo "file '$f'" ; \ | |
done ) -c copy "$concat" -loglevel quiet | |
# create background music | |
# choose from these functions based on $txtlen%10 | |
echo "main(t){for(;;t++)putchar(" > "$cfile" | |
case $((txtlen%10)) in | |
0) echo "t*((t>>12|t>>8)&63&t>>4)" >> "$cfile" ;; | |
1) echo "(t*(t>>5|t>>8))>>(t>>16)" >> "$cfile" ;; | |
2) echo "t*((t>>9|t>>13)&25&t>>6)" >> "$cfile" ;; | |
3) echo "t*(t>>11&t>>8&123&t>>3)" >> "$cfile" ;; | |
4) echo "(t*(t>>8*(t>>15|t>>8)&(20|(t>>19)*5>>t|t>>3))" >> "$cfile" ;; | |
5) echo "(t*5&t>>7)|(t*3&t>>10)" >> "$cfile" ;; | |
6) echo "t*(t>>((t>>9|t>>8))&63&t>>4)" >> "$cfile" ;; | |
7) echo "(t>>6|t|t>>(t>>16))*10+((t>>11)&7)" >> "$cfile" ;; | |
8) echo "(t>>7|t|t>>6)*10+4*(t&t>>13|t>>6)" >> "$cfile" ;; | |
9) echo "((t*(t>>8|t>>9)&46&t>>8))^(t&t>>13|t>>6)" >> "$cfile" ;; | |
esac | |
echo ");}" >> "$cfile" | |
# compile the program | |
gcc -w "$cfile" -o "$binfile" >/dev/null 2>&1 | |
# output 1M lines of raw audio | |
#"$binfile" | head -n 1M > "$raw" | |
"$binfile" | dd bs=1024 count=1024 > "$raw" | |
# get length of $concat | |
len=$(soxi -D "$concat") | |
# TODO: check, if $len is enough | |
# convert raw audio to wav and trim it to $len | |
sox -r 16000 -e signed -b 16 -c 2 -v -0.1 "$raw" "$bgmusic" trim 0 "$len" | |
# mix background music with $concat | |
# sox -m $bgmusic $concat $wav | |
# don't mix it, ignore the background music | |
cp "$concat" "$wav" | |
# add video to audio | |
ffmpeg -y -loglevel quiet -i "$wav" -filter_complex \ | |
avectorscope=s=${res_w}x${res_h} -acodec aac -strict -2 "$mp4" | |
# also nice: | |
#ffmpeg -y -loglevel quiet -i $wav -filter_complex \ | |
# "[0:a]showwaves=s=${res_w}x${res_h}:rate=25,format=yuv420p[vid]" \ | |
# -map "[vid]" -map 0:a -codec:v libx264 -crf 18 -preset fast \ | |
# -c:a aac -strict -2 -b:a 256k $mp4 | |
# thumbnail for youtube (png, 1280x720) | |
# create gradient thumbnail with rgba values | |
gmic -v -"$debug" "$yt_w,$yt_h,1,3" -gimp_corner_gradient "$rgba1,$rgba2,$rgba3,$rgba4" -o "$thumbnail" | |
# another thumbnail generation idea (but not as beautiful) | |
#dd if=$txt bs=$(($yt_w*$yt_h*3)) count=1 | convert -size ${yt_w}x${yt_h} -depth 8 rgb:- $thumbnail | |
# caption text | |
text="The first | |
$lines | |
lines of | |
$(basename "$txt"), | |
fully generated by a program." | |
# add caption to thumbnail | |
convert -background '#0008' -fill white -gravity center -size "${yt_w}x$((yt_h/4))" \ | |
caption:"$text" "$thumbnail" +swap -gravity south -composite "$thumbnail2" | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment