Skip to content

Instantly share code, notes, and snippets.

@phund
Last active June 17, 2020 04:21
Show Gist options
  • Save phund/b030fbcd726948f46fcbe246fdb6a37a to your computer and use it in GitHub Desktop.
Save phund/b030fbcd726948f46fcbe246fdb6a37a to your computer and use it in GitHub Desktop.
Tool translate a erb file
1. Use ruby 2.5.1
2. gem install bundler
3. bundle install
4. run
ruby translate.rb --enFilePath=[path_to_file_with_english_content] -destLangs=[list_of_languages_will_be_translated]
Example:
ruby translate.rb -enFilePath=~/phund/tools/_privacy.html.erb -destLangs=tr,ms,sv,vi,pl,nl,th,de,da,tl,ko,ar,no,ru,pt,fr,ja,id,it,fi,zh,es,el
source 'https://rubygems.org'
gem 'mechanize'
require 'mechanize'
EMAIL_VALIDATION_REGEX = %r{\A[a-zA-Z0-9.!#\$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\z}.freeze
def translate_text(target_language, content)
mechanize = Mechanize.new
url = "https://translate.google.com/m?hl=en&sl=en&tl=#{target_language}&prev=_m&q=#{CGI.escape(content)}"
page = mechanize.get(url)
(page.at('body div[dir="ltr"].t0') || page.at('body div[dir="rtl"].t0')).text.strip
end
def translate_file(file_path, languages)
origin_path = File.expand_path('../.', file_path)
origin_file = file_path
base_name = File.basename(file_path)
languages.each do |lang|
puts "Translating to #{lang}..."
doc = Nokogiri::HTML(File.open(origin_file), nil, 'UTF-8')
doc.search('//text()').each do |t|
next if t.name == '#cdata-section'
content = t.content.strip
next if content.match EMAIL_VALIDATION_REGEX
if content != '' && !content.start_with?('http')
new_content = translate_text(lang, content)
t.replace(t.content.sub(content, new_content))
end
end
File.open("#{origin_path}/_#{lang}#{base_name}", 'w+') do |f|
data = doc.at('body').inner_html
entities = {
'&lt;' => '<',
'&gt;' => '>',
'&amp;' => '&',
'&quot;' => '"',
'&&#13;' => "\r",
}
data = entities.keys.inject(data) { |string,key|
string.gsub(/#{key}/, entities[key])
}
f.puts(CGI.unescape(data))
end
end
end
en_path = ''
languages = []
ARGV.each do|a|
en_path = a.sub('-enFilePath=', '') if a.include?('-enFilePath=')
languages = a.sub('-destLangs=', '').split(',') if a.include?('-destLangs=')
end
if en_path != '' && languages.length > 0
translate_file(en_path, languages)
end
@trantuanckc
Copy link

Translate yml file

require 'mechanize'
require 'byebug'
require 'yaml'

EMAIL_VALIDATION_REGEX = %r{\A[a-zA-Z0-9.!#\$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\z}.freeze

def translate_text(target_language, content)
  # content  = '<p>We value your privacy<p>'
  mechanize = Mechanize.new
  url = "https://translate.google.com/m?hl=en&sl=en&tl=#{target_language}&prev=_m&q=#{CGI.escape(content)}"
  page = mechanize.get(url)
  (page.at('body div[dir="ltr"].t0') || page.at('body div[dir="rtl"].t0')).text.strip
end

def process_html(lang, doc)
  doc = Nokogiri::HTML(doc, nil, 'UTF-8')

  doc.search('//text()').each do |t|
    next if t.name == '#cdata-section'

    content = t.content.strip
    next if content.match EMAIL_VALIDATION_REGEX

    if content != '' && !content.start_with?('http')
      new_content = translate_text(lang, content)
      t.replace(t.content.sub(content, new_content))
    end
  end

  data = doc.at('body').inner_html

  entities = {
    '&lt;'    => '<',
    '&gt;'    => '>',
    '&amp;'   => '&',
    '&quot;'  => '"',
    '&&#13;'  => "\r",
  }

  data = entities.keys.inject(data) { |string,key|
    string.gsub(/#{key}/, entities[key])
  }

  data
end

def translate_file(file_path, languages)
  origin_path = File.expand_path('../.', file_path)
  origin_file = file_path
  base_name = File.basename(file_path)

  languages.each do |lang|
    puts "Translating to #{lang}..."

    yaml_string = File.read origin_file
    lang_string = File.read "#{origin_path}/#{lang}.yml"

    doc = YAML.load yaml_string
    lang_yml = YAML.load lang_string

    origin_lang = base_name.gsub('.yml', '')

    doc[origin_lang]['gdpr'].each do |k, t|
      content = t.strip

      if content != '' && !content.start_with?('http')
        new_content = process_html(lang, content)
        t.replace(new_content)
      end
    end

    lang_yml[lang]['gdpr'] = doc["#{origin_lang}"]['gdpr']
    output = YAML.dump lang_yml

    File.open("#{origin_path}/#{lang}.yml", 'w+') do |f|
      f.puts(CGI.unescape(output.gsub("---\n", '')))
    end

    sleep 5
  end
rescue Exception => e
  p e.inspect
end

en_path = ''
languages = []
ARGV.each do|a|
  en_path = a.sub('-enFilePath=', '') if a.include?('-enFilePath=')
  languages = a.sub('-destLangs=', '').split(',') if a.include?('-destLangs=')
end

if en_path != '' && languages.length > 0
  translate_file(en_path, languages)
end

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment