Last active
June 22, 2024 02:26
-
-
Save bathtime/8ae8303e870b2909c03f3b9332a4dd47 to your computer and use it in GitHub Desktop.
A simple Linux shell script for translating an .srt file into another language and merging both languages into an .ass file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# | |
# This program takes an .srt file, translates it, and merges both translations into a .ass file with the user's selected | |
# language on top of the screen and the other language at the bottom. | |
# | |
# Usage: ./transmerge.sh [source language] [target language] [language on top (en|fri|...)] [source .srt] [target .ass (optional)] | |
# | |
# ex., $ ./transmerge.sh en fr en movie.srt | |
# | |
# New file: movie.ass | |
# | |
# Above translates English to merged English+French with English displaying at the top of the screen and French the bottom. | |
# | |
source_lang=$1 | |
target_lang=$2 | |
top=$3 | |
source_file=$4 | |
file_saved=$5 | |
sleep_s=12 | |
lines=0 | |
count=0 | |
[ "$top" == "$source_lang" ] && bot=$target_lang || bot=$source_lang | |
[ "$file_saved" == "" ] && file_saved=$(echo $source_file | sed "s/\.srt$/\.ass/g") | |
file_saved=$file_saved"."$(date +"%m%d%H%M%S") | |
formatted_file=$file_saved".1" | |
filetype=$(file -i $source_file | awk -F'=' '{print $2'}) | |
[ "$filetype" == "utf-8" ] && iconv -f $filetype $source_file -o $formatted_file || perl -CS -pwe '' $source_file > $formatted_file | |
perl -i -CS -ane 's/<i>//g; s/<\/i>//g; s/- //g; s/\*//g; s/\r//g; $n=(@F==0) ? $n+1 : 0; print if $n<=1' $formatted_file | |
substart="[Script Info]\n\ | |
ScriptType: v4.00+\\n\ | |
Collisions: Normal\\n\ | |
PlayDepth: 0\nTimer: 100,0000\n\ | |
Video Aspect Ratio: 0\n\ | |
WrapStyle: 0\n\ | |
ScaledBorderAndShadow: no\n\ | |
\n\ | |
[V4+ Styles]\n\ | |
Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,OutlineColour,BackColour,Bold,Italic,Underline,StrikeOut,ScaleX,ScaleY,Spacing,Angle,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,Encoding\n\ | |
Style: "$top",Arial,10,&H00F9FFFF,&H00FFFFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,1,0,8,10,10,10,0\n\ | |
Style: "$bot",Arial,18,&H00F9FFF9,&H00FFFFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,2,0,2,10,10,10,0\n\ | |
\n\ | |
[Events]\n\ | |
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text" | |
echo -e $substart > $file_saved | |
for task in "Formatting" "Translating"; do | |
echo -e "\n\n"$task" "$(echo $source_file | rev | cut -f 2- -d '.' | rev)".srt ...\n\n\n"$substart | |
while read -r lineNum; read -r lineTime; read -r lineText; do | |
## Only loop if no empty text is found | |
while [[ $lineText =~ [^[:space:]] ]]; do ## [ "$lineText" != "" ] | |
text=$text$lineText" " | |
read -r lineText | |
done | |
if [ "$task" == "Translating" ]; then | |
translation=$(wget -U "Mozilla/5.0" -q -O- "http://translate.googleapis.com/translate_a/single?client=gtx&sl=$source_lang&tl=$target_lang&dt=t&q=$text" | perl -lne 'push @a,/(?<!\,\[\[)\[\"(.*?)(?<!\\)\"/g;END{print "@a"}' | perl -CS -pwe 's/\N{U+005C}\N{U+0022}/\N{U+0022}/g;') | |
#translation=$(trans -s $source_lang -t $target_lang -b "$text") | |
else | |
translation=$text | |
fi | |
## Add\Remove a space for question marks, exclamation marks, colons... depending on language | |
if [ "$source_lang" == "fr" ]; then | |
text=$(echo $text | perl -CS -pwe 's/((?!=\w)\s(?=[!|?|;|:]))//g; s/((?<=[\w|\s])[!|?|;|:])/ $&/g; s/\N{U+00AB}(?=\S)/$& /g; s/(?=\S)\N{U+00BB}/ $&/g; ') | |
else | |
text=$(echo $text | sed 's/ !/!/g; s/ ?/?/g; s/ :/:/g; s/ ;/\;/g;') | |
fi | |
## “ = U+201C, ” = U+201D, ' = U+0027 , " = U+0022, « = U+00AB, » = U+00BB | |
if [ "$target_lang" == "fr" ]; then | |
translation=$(echo $translation | perl -CS -pwe 's/((?!=\w)\s(?=[!|?|;|:]))//g; s/((?<=[\w|\s])[!|?|;|:])/ $&/g; s/\N{U+00AB}(?=\S)/$& /g; s/(?=\S)\N{U+00BB}/ $&/g;') | |
else | |
translation=$(echo $translation | sed 's/ !/!/g; s/ ?/?/g; s/ :/:/g; s/ ;/\;/g;') | |
fi | |
## Grab and incorperate timeline info. This shell command is faster than awk, sed, and cut | |
timeStamp="Dialogue: 0,"${lineTime:1:7}"."${lineTime:9:2}","${lineTime:18:7}"."${lineTime:26:2}"," | |
first=$timeStamp$top",,0000,0000,0000,,"$([ "$top" == "$source_lang" ] && echo $text || echo $translation) | |
second=$timeStamp$bot",,0000,0000,0000,,"$([ "$top" != "$source_lang" ] && echo $text || echo $translation) | |
if [ "$task" == "Translating" ]; then | |
count=$((count + 1)) | |
printf "%s\n%s\n" "$first" "$second" |tee -a $file_saved | |
## Don't sleep after last translation as it's not necessary | |
if [[ $lines -ne $count ]]; then | |
percentage=$(( ($count*100)/$lines )) | |
echo -n $'\e[s' ## Save cursor position | |
for (( i=$sleep_s; i>0; i-- )); do | |
timeleft=$(date -d@$(( ($lines-$count-1)*$sleep_s+$i )) -u +%H:%M:%S) | |
echo -e -n "\e[7m\e[1mFile: "$source_file" Logical line: "$count" / "$lines" ("$percentage"%) Real line: "$lineNum" / "$realLines" Time left: "$timeleft" Next translation: "$i"s... "$'\e[u' | |
sleep 1 | |
done | |
## Print spaces to delete info line then reset cursor to previous position | |
printf "%-135s %s" "Translating..." $'\e[u' | |
fi | |
else | |
lines=$((lines + 1)) | |
printf "%s\n%s\n" "$first" "$second" | |
realLines=$lineNum | |
fi | |
text="" | |
done < $formatted_file | |
done | |
new_file_saved=$(echo $file_saved | rev | cut -f 2- -d '.' | rev) | |
mv $file_saved $new_file_saved | |
rm $formatted_file | |
echo -e "\n\nDone!\n\n" | |
echo "cat "$(pwd)"/"$new_file_saved | |
echo "vi "$(pwd)"/"$source_file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How does it work?
Take an .srt file, such as this one:
sample.srt:
~ Courtesy Jules et Jim (1962)
Make the script executable:
$ chmod +x transmerge.sh
Run the script with the filename:
$ ./transmerge.sh fr en en sample.srt
The above command tells transmerge that the source language is French, the target language is English and that English will be displayed on top, French, on the bottom.
The resulting file is:
sample.ass:
Here is an example of how a generated .ass subtitle file might look in a video player. VLC player is used in this case. Please note that not all players accent .ass format:
Screenshot from 2020-03-25 11-25-46