Skip to content

Instantly share code, notes, and snippets.

@seanhandley
Created October 25, 2011 13:55
Show Gist options
  • Save seanhandley/1312798 to your computer and use it in GitHub Desktop.
Save seanhandley/1312798 to your computer and use it in GitHub Desktop.
# translate supplier content nodes
# looks horrible, gets it done
require 'google_fish'
class T
class << self
def translate_html_chunks(google, target_lang, translation_chunks)
translated_page = ''
translation_chunks.each do | chunk_to_translate |
translated_page += translate_html_chunk(google, target_lang, chunk_to_translate)
end
translated_page
end
def translate_html_chunk(google, target_lang, chunk_to_translate)
translation_result = ''
(1..4).each do |i|
begin
translation_result = google.translate(:en, target_lang, chunk_to_translate, :html => true)
break
rescue StandardError => e
Rails.logger.warn "googlefish - attempt #{i} - translation exception - #{e.inspect}"
translation_result = chunk_to_translate
end
end
translation_result
end
def prepare_translation_chunks(body_html)
lastpos=0
translation_chunks = []
while(true)
substr = truncate(body_html[lastpos, body_html.length], 100)
break if substr.empty?
translation_chunks << substr
lastpos += substr.length
end
translation_chunks
end
def truncate(text, limit)
t = text[0, limit]
seperators = '\.\?\!;:'
broken_sentence_matcher = Regexp.new("[" + seperators + "](?![^" + seperators + "]*[" + seperators + "])")
if not m1 = t =~ /<(?![^<]*>)/ # no broken tag present
if not m2 = t =~ />\s*$/ #if doesn't end with '>'
if m3 = t =~ broken_sentence_matcher #if broken sentence present
if m4 = t =~ />(?![^>]*<)/
if m3 > m4
t[0, m3+1]
else
t[0, m4+1]
end
else
t[0, m3+1]
end
else
t
end
else
t
end
else
t[0, m1]
end
end
end
end
english = Language.find_by_code 'en'
languages = Language.all.reject { |l| l == english }
report_filename = "google_vs_human.csv"
error_filename = "hotel_errors.log"
puts "Please enter your google api key:"
api_key = STDIN.gets.chomp
google = GoogleFish.new(api_key)
['AvailabilitySearch::Hotel'].each do |attachable_type|
english_phrases = ContentNode.find(:all, :conditions => { :language_id => english.id, :content_attachable_type => attachable_type})
languages.each do |lang|
puts "processing #{lang.name}"
I18n.locale = lang.code
english_phrases.each do |node|
if ContentNode.find(:first, :conditions => {:identifier => 'booking_conditions', :language_id => lang.id, :content_attachable_id => node.content_attachable_id, :content_attachable_type => attachable_type } )
#File.open(report_filename, 'a') {|f| f.write("#{node.id},'#{lang.name}','human'\n") }
puts "skipping"
next
end
begin
puts "translating #{node.id}"
new_node = ContentNode.create
new_node.content_attachable_id = node.content_attachable_id
new_node.content_attachable_type = node.content_attachable_type
html_chunks = T.prepare_translation_chunks(new_node.content)
new_node.content = T.translate_html_chunks(google, lang.code.to_sym, html_chunks)
new_node.language_id = lang.id
puts new_node.content[0..100]
puts new_node.inspect
new_node.save!
#File.open(report_filename, 'a') {|f| f.write("#{node.id},'#{lang.name}','google'\n") }
puts "saved #{node.id} in #{lang.name}"
sleep 1
rescue Exception => e
#File.open(report_filename, 'a') {|f| f.write("#{node.id},'#{lang.name}','error'\n") }
#File.open(error_filename, 'a') {|f| f.write("#{node.id},'#{lang.name}','error','#{e.backtrace}'\n") }
puts "Something went wrong while processing id #{node.id} for #{lang.name}: #{e.backtrace}"
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment