Skip to content

Instantly share code, notes, and snippets.

@mkuendig
Forked from chezou/Gemfile
Last active December 31, 2016 02:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mkuendig/075f7f672f37bdb589747b91a4bf2192 to your computer and use it in GitHub Desktop.
Save mkuendig/075f7f672f37bdb589747b91a4bf2192 to your computer and use it in GitHub Desktop.
Example code of ruby with Amazon Polly
source 'https://rubygems.org'
gem 'nokogiri', '~>1.6'
gem 'aws-sdk', '~> 2'
gem 'open-uri'
gem 'ruby-progressbar'
gem "mp3info"
gem 'streamio-ffmpeg'
# Goal of this ruby script is to input text (like a text file you exported from an epub in Calibre) in to AWS Polly,
# create an Audiobook with chapters and Subtitles.
# Subtitles are not correctly working yet
# This is run on OSX with ffmpeg and MP4Box tools
require 'aws-sdk'
require 'nokogiri'
require 'open-uri'
require 'ruby-progressbar'
require "mp3info"
require 'streamio-ffmpeg'
album_title = "albumtitle"
album_artist ="albumartist"
album_genre = "albumgenre"
album_year = "albumyear"
class Synthesizer
def initialize(region='us-west-2')
@polly = Aws::Polly::Client.new(region: region)
end
def synthesize(text, file_name="./tmp.mp3", voice_id="Nicole")
@polly.synthesize_speech(
response_target: file_name,
text: text,
output_format: "mp3",
# You can use voice IDs http://docs.aws.amazon.com/polly/latest/dg/API_Voice.html
# If you want to synthesize Japanese voice, you can use "Mizuki"
voice_id: voice_id,
text_type: "ssml"
)
end
end
def hms(seconds, decimals = 0)
int = seconds.floor
decs = [decimals, 8].min
hms = [int / 3600, (int / 60) % 60, int % 60].map { |t| t.to_s.rjust(2,'0') }.join(':')
hms << (seconds - int).round(decs).to_s[1..-1] if decs > 0
hms
end
module TextFetcher
def self.fetch_text_from(url, xpath)
charset = "UTF-8"
txt = open(url) do |f|
#charset = f.charset
f.read
end
txt.gsub! "\n\n\n\n\n\n\n\n" , "\n\n\n\n\n"
txt.gsub! "\n\n\n\n\n\n\n" , "\n\n\n\n\n"
txt.gsub! "\n\n\n\n\n\n" , "\n\n\n\n\n"
txt.gsub! "\n\n\n\n\n" , "\n\n\n\n\nBook Chapter "
txt.gsub!("&", 'and')
puts "\n \n txt variable #{txt.inspect}" if $DEBUG
node_texts = txt.split(/(?<=[,?.!] )\s*/)
puts "\n \n total variable #{$total}" if $DEBUG
p node_texts.inspect if $DEBUG
text =""
combined_texts = []
tmp_string = ""
node_texts.each do |text|
#puts text.size
if tmp_string.size + text.size > 1490
combined_texts << tmp_string
tmp_string = ""
end
tmp_string << " #{text}"
end
combined_texts << tmp_string
$total = combined_texts.count
combined_texts = combined_texts
end
end
if __FILE__ == $0
toc =""
synthesizer = Synthesizer.new
url = "/Users/xxx/Documents/divers/scripts/aws_polly_txt/test.txt"
# This XPath assumes any contents
xpath = '//text()'
filename = url.split(File::SEPARATOR)
input_texts = TextFetcher.fetch_text_from(url, xpath)
progressbar = ProgressBar.create(:title => "Progress", :starting_at => 0, :total => $total, :format => "%a %e %P% Processed: %c from %C")
#create srt file for subtitles
File.delete("./#{filename[-1]}.srt") if File.exist?("./#{filename[-1]}.srt")
File.open("./#{filename[-1]}.srt", 'a+') {|f| f.write("\n0 \n00:00:00,000 --> 00:00:10,000 \n Book Start \n\n0\n00:00:10,000") }
srt_position = 0
input_texts.each.with_index do |text, i|
puts "\n \n Status index variable #{i} \n#{text}\n" if $DEBUG
progressbar.increment
if text.size > 1505
text = text[0,text.rindex(/\s/,1497)].rstrip + '.'
puts "Length error fired"
puts text.size
puts "\n \n Status index variable #{i} \n#{text}\n" if $DEBUG
end
if (text=~/Book Chapter /)
toc << "#{i} #{text[/Book Chapter \b\w*/]} \n"
end
puts "\n \n Status index variable #{i}\n#{text}\n" if $DEBUG
synthesizer.synthesize("<speak><prosody rate='slow'><p>" + text + "</p></prosody></speak>", "./#{filename[-1]}_tmp_#{i}.mp3")
#sleep(0.05)
Mp3Info.open("./#{filename[-1]}_tmp_#{i}.mp3") do |mp3info|
srt_position += mp3info.length
end
File.open("./#{filename[-1]}.srt", 'a+') {|f| f.write(" --> #{hms(srt_position,3).sub("." , ",")}\n#{text}\n\n#{i}\n#{hms(srt_position,3).sub("." , ",")}" ) }
end
File.open("./#{filename[-1]}.srt", 'a+') {|f| f.write(" --> #{hms(srt_position,3).sub("." , ",")}\nEND\n" ) }
puts "\n \n toc variable #{toc}" if $DEBUG
#get MP3 lengths and create chapter file
Puts "\nCreate Table of Contents (TOC)\n"
File.delete("./#{filename[-1]}.toc") if File.exist?("./#{filename[-1]}.toc")
File.open("./#{filename[-1]}.toc", 'a+') {|f| f.write("00:00:00.000 Book Start \n") }
toc_ary = toc.split(/\s*?\n\s*/)
p toc_ary if $DEBUG
toc_ary.each do |toc_line|
toc_line_number_position = 0
toc_line_number = toc_line.split.first.to_i
puts toc_line_number if $DEBUG
for toc_i in 0..toc_line_number do
Mp3Info.open("./#{filename[-1]}_tmp_#{toc_i}.mp3") do |mp3info|
toc_line_number_position += mp3info.length
end
end
File.open("./#{filename[-1]}.toc", 'a+') {|f| f.write("#{hms(toc_line_number_position,3)} #{toc_line.split(' ')[1..-1].join(' ')} \n") }
end
# create final mp4 file
# You can combine mp3 with cat on Linux based system
`cat ./"#{filename[-1]}"_tmp_*.mp3 > ./"#{filename[-1]}".mp3`
movie = FFMPEG::Movie.new("./#{filename[-1]}.mp3")
options = {
audio_codec: "aac", audio_bitrate: 48, audio_sample_rate: 22050, audio_channels: 1,
threads: 4, custom: %w( )
}
movie.transcode("./#{filename[-1]}.mp4", options)
`ffmpeg -i "./#{filename[-1]}.mp4" -f srt -i "./#{filename[-1]}.srt" -c:a copy -c:s mov_text "./#{filename[-1]}_sub.mp4"`
`MP4Box -chap ./"#{filename[-1]}.toc" -add ./"#{filename[-1]}_sub.mp4" -new ./"#{filename[-1]}.m4b"`
`mp4tags -album "#{album_title}" -artist "#{album_artist}" -genre "#{album_genre}" -year "#{album_year}" "./#{filename[-1]}.m4b"`
`mp4file --optimize "./#{filename[-1]}.m4b"`
if File.size?("./#{filename[-1]}.m4b") >= 1000000000000000000
File.delete("./#{filename[-1]}.toc") if File.exist?("./#{filename[-1]}.toc")
File.delete("./#{filename[-1]}.mp4") if File.exist?("./#{filename[-1]}.mp4")
File.delete("./#{filename[-1]}_sub.mp4") if File.exist?("./#{filename[-1]}_sub.mp4")
File.delete("./#{filename[-1]}.mp3") if File.exist?("./#{filename[-1]}.mp3")
File.delete("./#{filename[-1]}.srt") if File.exist?("./#{filename[-1]}.srt")
`rm ./"#{filename[-1]}"_tmp_*.mp3`
end
`open ./"#{filename[-1]}.m4b"`
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment