Skip to content

Instantly share code, notes, and snippets.

@ca4ti
Forked from michalfapso/speak.pl
Created August 14, 2021 15:05
Show Gist options
  • Save ca4ti/4c69424efd8b423f662fca1bce0f72a7 to your computer and use it in GitHub Desktop.
Save ca4ti/4c69424efd8b423f662fca1bce0f72a7 to your computer and use it in GitHub Desktop.
Google Text-to-Speech script for processing longer texts
#!/usr/bin/perl
#--------------------------------------------------
# Usage:
# ./speak.pl en input.txt output.mp3
#
# Prerequisites:
# sudo apt-get install libwww-perl libhtml-tree-perl sox libsox-fmt-mp3
#
# Compiling sox:
# Older versions of sox package might not have the support for mp3 codec,
# so just download sox from http://sox.sourceforge.net/
# install packages libmp3lame-dev libmad0-dev
# and compile sox
#
# List of language code names for Google TTS:
# af Afrikaans
# sq Albanian
# am Amharic
# ar Arabic
# hy Armenian
# az Azerbaijani
# eu Basque
# be Belarusian
# bn Bengali
# bh Bihari
# bs Bosnian
# br Breton
# bg Bulgarian
# km Cambodian
# ca Catalan
# zh-CN Chinese (Simplified)
# zh-TW Chinese (Traditional)
# co Corsican
# hr Croatian
# cs Czech
# da Danish
# nl Dutch
# en English
# eo Esperanto
# et Estonian
# fo Faroese
# tl Filipino
# fi Finnish
# fr French
# fy Frisian
# gl Galician
# ka Georgian
# de German
# el Greek
# gn Guarani
# gu Gujarati
# ha Hausa
# iw Hebrew
# hi Hindi
# hu Hungarian
# is Icelandic
# id Indonesian
# ia Interlingua
# ga Irish
# it Italian
# ja Japanese
# jw Javanese
# kn Kannada
# kk Kazakh
# rw Kinyarwanda
# rn Kirundi
# ko Korean
# ku Kurdish
# ky Kyrgyz
# lo Laothian
# la Latin
# lv Latvian
# ln Lingala
# lt Lithuanian
# mk Macedonian
# mg Malagasy
# ms Malay
# ml Malayalam
# mt Maltese
# mi Maori
# mr Marathi
# mo Moldavian
# mn Mongolian
# sr-ME Montenegrin
# ne Nepali
# no Norwegian
# nn Norwegian (Nynorsk)
# oc Occitan
# or Oriya
# om Oromo
# ps Pashto
# fa Persian
# pl Polish
# pt-BR Portuguese (Brazil)
# pt-PT Portuguese (Portugal)
# pa Punjabi
# qu Quechua
# ro Romanian
# rm Romansh
# ru Russian
# gd Scots Gaelic
# sr Serbian
# sh Serbo-Croatian
# st Sesotho
# sn Shona
# sd Sindhi
# si Sinhalese
# sk Slovak
# sl Slovenian
# so Somali
# es Spanish
# su Sundanese
# sw Swahili
# sv Swedish
# tg Tajik
# ta Tamil
# tt Tatar
# te Telugu
# th Thai
# ti Tigrinya
# to Tonga
# tr Turkish
# tk Turkmen
# tw Twi
# ug Uighur
# uk Ukrainian
# ur Urdu
# uz Uzbek
# vi Vietnamese
# cy Welsh
# xh Xhosa
# yi Yiddish
# yo Yoruba
# zu Zulu
#--------------------------------------------------
use strict;
use HTTP::Cookies;
use WWW::Mechanize;
use LWP;
use HTML::TreeBuilder;
use Data::Dumper;
$Data::Dumper::Maxdepth = 2;
if (scalar(@ARGV) != 3) {
print STDERR "Usage: $0 LANGUAGE IN.txt OUT.mp3\n";
print STDERR "\n";
print STDERR "Examples: \n";
print STDERR " echo \"Hello world\" | ./speak.pl en speech.mp3\n";
print STDERR " cat file.txt | ./speak.pl en speech.mp3\n";
exit;
}
my $language = $ARGV[0]; # sk | en | cs | ...
my $textfile_in = $ARGV[1];
my $all_mp3_out = $ARGV[2];
my $SENTENCE_MAX_CHARACTERS = 100; # limit for google tts
my $TMP_DIR = "$all_mp3_out.tmp";
my $RECAPTCHA_URL = "http://www.google.com/sorry/?continue=http%3A%2F%2Ftranslate.google.com%2Ftranslate_tts%3Ftl=en%26q=Your+identity+was+successfuly+confirmed.";
my $RECAPTCHA_SLEEP_SECONDS = 60;
my $SYSTEM_WEBBROWSER = "firefox";
my $MAX_OPENED_FILES = 1000;
mkdir $TMP_DIR;
my $silence_duration_paragraphs = 0.8;
my $silence_duration_sentences = 0.2;
my $silence_duration_comma = 0.1;
my $silence_duration_brace = 0.1;
my $silence_duration_semicolon = 0.2;
my $silence_duration_words = 0.05;
my @headers = (
'Host' => 'translate.google.com',
'User-Agent' => 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.5) Gecko/20091109 Ubuntu/9.10 (karmic) Firefox/3.5.5',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' => 'en-us,en;q=0.5',
'Accept-Encoding' => 'gzip,deflate',
'Accept-Charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Keep-Alive' => '300',
'Connection' => 'keep-alive',
);
my $cookie_jar = HTTP::Cookies->new(hide_cookie2 => 1);
#$cookie_jar->clear();
#$cookie_jar->set_cookie(undef, "SESSIONID", $sessionid, "/", $domain, undef, 1, 0, undef, 1);
my $mech = WWW::Mechanize->new(autocheck => 0, cookie_jar => $cookie_jar);
$mech->agent_alias( 'Windows IE 6' );
$mech->add_header( "Connection" => "keep-alive" );
$mech->add_header( "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
$mech->add_header( "Accept-Language" => "sk,cs;q=0.8,en-us;q=0.5,en;q=0.3");
my $browser = LWP::UserAgent->new;
my $referer = "";
my @all_mp3s = ();
my $sentence_idx = 0;
my $tts_requests_counter = 0;
my $sample_rate = 0;
# For each input line
open(IN, $textfile_in) or die("ERROR: Can not open file '$textfile_in'");
while (my $line = <IN>)
{
chomp($line);
print "line: $line\n";
# Check for empty lines - paragraphs separator
if ($line =~ /^\s*$/) {
if ($sample_rate != 0) {
push @all_mp3s, SilenceToMp3($sentence_idx++, $silence_duration_paragraphs, $sample_rate);
}
} else {
my @words = split(/\s+/, $line);
my $sentence = "";
# For each word
for (my $i=0; $i<scalar(@words); $i++)
{
my $word = $words[$i];
$sentence .= " $word"; # add another word to the sentence
my $say = 0;
my $silence_duration = 0.0;
if (length($sentence) >= $SENTENCE_MAX_CHARACTERS) {
# Remove the last word;
$sentence = substr($sentence, 0, length($sentence)-length($word)-1);
$say = 1;
$silence_duration = $silence_duration_words;
$i --; # one word back
}
# If a separator was found
elsif (substr($word, length($word)-1, 1) =~ /[.!?]/ ) {
$say = 1;
$silence_duration = $silence_duration_sentences;
}
elsif (substr($word, length($word)-1, 1) eq ",") {
$say = 1;
$silence_duration = $silence_duration_comma;
}
elsif (substr($word, length($word)-1, 1) eq ";") {
$say = 1;
$silence_duration = $silence_duration_semicolon;
}
elsif (substr($word, length($word)-1, 1) eq ")") {
$say = 1;
$silence_duration = $silence_duration_brace;
}
# If there are no more words
elsif ($i == scalar(@words)-1) {
$say = 1;
$silence_duration = $silence_duration_words;
}
if ($say) {
print "sentence[$tts_requests_counter]: $sentence\n";
my $trimmed_mp3 = TrimSilence( SentenceToMp3($sentence, $sentence_idx++) );
my $trimmed_mp3_sample_rate = `soxi -r $trimmed_mp3`;
chomp($trimmed_mp3_sample_rate);
if ($sample_rate == 0) {
$sample_rate = $trimmed_mp3_sample_rate;
}
if ($sample_rate != $trimmed_mp3_sample_rate) {
die("Error: sample rate of '$trimmed_mp3' differs from the sample rate of previous files.");
}
#print "trimmed_mp3_sample_rate: $trimmed_mp3_sample_rate\n";
push @all_mp3s, $trimmed_mp3;
push @all_mp3s, SilenceToMp3($sentence_idx++, $silence_duration, $sample_rate);
$tts_requests_counter ++;
$sentence = ""; # start a new sentence
}
}
}
}
print "Concatenate: @all_mp3s\n";
print "Writing output to $all_mp3_out...";
JoinMp3s(\@all_mp3s, $all_mp3_out);
print "done\n";
sub JoinMp3s() {
my $mp3s_ref = shift;
my $mp3_out = shift;
my $depth = shift || 0;
# print "JoinMp3s(".join(" ",@{$mp3s_ref}).", $mp3_out, $depth)\n";
#--------------------------------------------------
# Problem if the number of mp3s exceeds the max number of opened files per process
# The audio files should be concatenated by smaller chunks
#--------------------------------------------------
if (scalar(@{$mp3s_ref}) < $MAX_OPENED_FILES) {
Exec("sox @{$mp3s_ref} $mp3_out");
} else {
my @subset_mp3s_out = ();
my @subset_mp3s = ();
my $sub_idx = 0;
for (my $i = 0; $i < scalar(@{$mp3s_ref}); $i++) {
push (@subset_mp3s, $mp3s_ref->[$i]);
if (scalar(@subset_mp3s) >= $MAX_OPENED_FILES-1 || $i == scalar(@{$mp3s_ref})-1) {
my $sub_mp3_out = "$TMP_DIR/subjoin_".$depth."_$sub_idx.mp3"; $sub_idx++;
JoinMp3s(\@subset_mp3s, $sub_mp3_out, $depth+1);
push (@subset_mp3s_out, $sub_mp3_out);
@subset_mp3s = ();
}
}
JoinMp3s(\@subset_mp3s_out, $mp3_out, $depth+1);
}
}
sub SilenceToMp3() {
my $idx = shift;
my $duration = shift;
my $sample_rate = shift;
my $mp3_out = sprintf("$TMP_DIR/%04d_sil.mp3", $sentence_idx);
Exec("sox -n -r $sample_rate $mp3_out trim 0.0 $duration");
return $mp3_out;
}
sub SentenceToMp3() {
my $sentence = shift;
my $sentence_idx = shift;
$sentence =~ s/ /+/g;
if (length($sentence) > $SENTENCE_MAX_CHARACTERS) {
die ("ERROR: sentence has more than $SENTENCE_MAX_CHARACTERS characters: '$sentence'");
}
my $mp3_out = sprintf("$TMP_DIR/%04d.mp3", $sentence_idx);
#print "mp3_out: $mp3_out\n";
#print "http://translate.google.com/translate_tts?q=$sentence\n";
# my $resp = GetSentenceResponse($sentence);
my $resp = GetSentenceResponse_CaptchaAware($sentence); # NOT WORKING YET
if (length($resp) == 0) {
print "EMPTY SENTENCE: '$sentence'\n";
return "";
}
open(FILE,">$mp3_out");
print FILE $resp;
close(FILE);
return $mp3_out;
}
sub GetSentenceResponse() {
my $sentence = shift;
#my $resp = $browser->get("http://translate.google.com/translate_tts?tl=$language&q=$sentence", @headers);
my $resp = $browser->get("http://translate.google.com/translate_tts?tl=$language&q=$sentence");
if ($resp->content =~ "^<!DOCTYPE" ||
$resp->content =~ "^<html>")
{
die("ERROR: expecting MP3 data, but got a HTML page!");
}
return $resp->content;
}
sub GetSentenceResponse_CaptchaAware() {
my $sentence = shift;
my $recaptcha_waiting = 0;
print "URL: http://translate.google.com/translate_tts?tl=$language&q=$sentence\n";
while (1) {
#$resp = $browser->get("http://translate.google.com/translate_tts?tl=$language&q=$sentence", @headers);
#print $resp->content;
#$mech->get("http://translate.google.com/translate_tts?tl=$language&q=$sentence", @headers);
my $url = "http://translate.google.com/translate_tts?tl=$language&q=$sentence";
$mech->get($url); $mech->add_header( Referer => "$referer" ); $referer = $url;
# print "Headers:\n".Dumper($mech->dump_headers());
# open my $fh, '<', "recaptcha_response.html" or die "error opening file: $!";
# $resp = do { local $/; <$fh> };
if ($mech->response()->content() =~ /^<!DOCTYPE/ ||
$mech->response()->content() =~ /^<html>/)
{
my $tree = HTML::TreeBuilder->new();
$tree->parse_content($mech->response()->content());
print "HTML response: ".$tree->as_text()."\n";
if (!$recaptcha_waiting) {
$recaptcha_waiting = 1;
print "We have to wait\n";
}
print ".";
sleep($RECAPTCHA_SLEEP_SECONDS);
next;
my $captcha_img_url = "http://translate.google.com".$tree->look_down("_tag", "img")->attr("src");
print "img: ".$captcha_img_url;
my $mech2 = $mech->clone();
$referer = "http://www.google.com/sorry/?continue=$url";
$mech2->add_header( Referer => "$referer" );
$mech2->get($captcha_img_url, ':content_file' => 'captcha.jpg');
# print "\n\n".$mech->response()->content()."\n\n";
print "enter captcha here: ";
my $val = <STDIN>;
print "val: $val\n";
# TODO: THIS DOES NOT WORK! MAYBE WAITING FOR HALF AN HOUR WOULD BE BETTER
$mech->add_header( Referer => "$referer" );
my $res = $mech->submit_form(with_fields => {captcha => "$val"});
print "response: ".$res->content."\n";
} else {
# print "MP3 response\n";
last;
}
sleep($RECAPTCHA_SLEEP_SECONDS);
PrintWaitingDot();
}
if ($recaptcha_waiting) { print "\n"; }
return $mech->response()->content();
}
sub PrintWaitingDot() {
select STDOUT;
print ".";
$|=1;
}
sub TrimSilence() {
my $mp3 = shift;
if ($mp3 eq "") {
return "";
}
my $mp3_out = $mp3;
$mp3_out =~ s/\.mp3$/_trim.mp3/;
Exec("
sox $mp3 -p silence 1 0.1 -60d \\
| sox -p -p reverse \\
| sox -p -p silence 1 0.1 -60d \\
| sox -p $mp3_out reverse
");
return $mp3_out;
}
sub Exec() {
my $cmd = shift;
# print "exec $cmd\n";
system $cmd;
return;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment