-
-
Save trantuanckc/014ca76150cbc79b2ce7a77b443df70e to your computer and use it in GitHub Desktop.
Tool translate a erb file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1. Use ruby 2.5.1 | |
2. gem install bundler | |
3. bundle install | |
4. run | |
ruby translate.rb --enFilePath=[path_to_file_with_english_content] -destLangs=[list_of_languages_will_be_translated] | |
Example: | |
ruby translate.rb -enFilePath=~/phund/tools/_privacy.html.erb -destLangs=tr,ms,sv,vi,pl,nl,th,de,da,tl,ko,ar,no,ru,pt,fr,ja,id,it,fi,zh,es,el |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source 'https://rubygems.org' | |
gem 'mechanize' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'mechanize' | |
EMAIL_VALIDATION_REGEX = %r{\A[a-zA-Z0-9.!#\$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\z}.freeze | |
def translate_text(target_language, content) | |
mechanize = Mechanize.new | |
url = "https://translate.google.com/m?hl=en&sl=en&tl=#{target_language}&prev=_m&q=#{CGI.escape(content)}" | |
page = mechanize.get(url) | |
(page.at('body div[dir="ltr"].t0') || page.at('body div[dir="rtl"].t0')).text.strip | |
end | |
def translate_file(file_path, languages) | |
origin_path = File.expand_path('../.', file_path) | |
origin_file = file_path | |
base_name = File.basename(file_path) | |
languages.each do |lang| | |
puts "Translating to #{lang}..." | |
doc = Nokogiri::HTML(File.open(origin_file), nil, 'UTF-8') | |
doc.search('//text()').each do |t| | |
next if t.name == '#cdata-section' | |
content = t.content.strip | |
next if content.match EMAIL_VALIDATION_REGEX | |
if content != '' && !content.start_with?('http') | |
new_content = translate_text(lang, content) | |
t.replace(t.content.sub(content, new_content)) | |
end | |
end | |
File.open("#{origin_path}/_#{lang}#{base_name}", 'w+') do |f| | |
data = doc.at('body').inner_html | |
entities = { | |
'<' => '<', | |
'>' => '>', | |
'&' => '&', | |
'"' => '"', | |
'& ' => "\r", | |
} | |
data = entities.keys.inject(data) { |string,key| | |
string.gsub(/#{key}/, entities[key]) | |
} | |
f.puts(CGI.unescape(data)) | |
end | |
end | |
end | |
en_path = '' | |
languages = [] | |
ARGV.each do|a| | |
en_path = a.sub('-enFilePath=', '') if a.include?('-enFilePath=') | |
languages = a.sub('-destLangs=', '').split(',') if a.include?('-destLangs=') | |
end | |
if en_path != '' && languages.length > 0 | |
translate_file(en_path, languages) | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'mechanize' | |
require 'byebug' | |
require 'yaml' | |
EMAIL_VALIDATION_REGEX = %r{\A[a-zA-Z0-9.!#\$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*\z}.freeze | |
def translate_text(target_language, content) | |
# content = '<p>We value your privacy<p>' | |
mechanize = Mechanize.new | |
url = "https://translate.google.com/m?hl=en&sl=en&tl=#{target_language}&prev=_m&q=#{CGI.escape(content)}" | |
page = mechanize.get(url) | |
(page.at('body div[dir="ltr"].t0') || page.at('body div[dir="rtl"].t0')).text.strip | |
end | |
def process_html(lang, doc) | |
doc = Nokogiri::HTML(doc, nil, 'UTF-8') | |
doc.search('//text()').each do |t| | |
next if t.name == '#cdata-section' | |
content = t.content.strip | |
next if content.match EMAIL_VALIDATION_REGEX | |
if content != '' && !content.start_with?('http') | |
new_content = translate_text(lang, content) | |
t.replace(t.content.sub(content, new_content)) | |
end | |
end | |
data = doc.at('body').inner_html | |
entities = { | |
'<' => '<', | |
'>' => '>', | |
'&' => '&', | |
'"' => '"', | |
'& ' => "\r", | |
} | |
data = entities.keys.inject(data) { |string,key| | |
string.gsub(/#{key}/, entities[key]) | |
} | |
data | |
end | |
def translate_file(file_path, languages) | |
origin_path = File.expand_path('../.', file_path) | |
origin_file = file_path | |
base_name = File.basename(file_path) | |
languages.each do |lang| | |
puts "Translating to #{lang}..." | |
yaml_string = File.read origin_file | |
lang_string = File.read "#{origin_path}/#{lang}.yml" | |
doc = YAML.load yaml_string | |
lang_yml = YAML.load lang_string | |
origin_lang = base_name.gsub('.yml', '') | |
doc[origin_lang]['gdpr'].each do |k, t| | |
content = t.strip | |
if content != '' && !content.start_with?('http') | |
new_content = process_html(lang, content) | |
t.replace(new_content) | |
end | |
end | |
lang_yml[lang]['gdpr'] = doc["#{origin_lang}"]['gdpr'] | |
output = YAML.dump lang_yml | |
File.open("#{origin_path}/#{lang}.yml", 'w+') do |f| | |
f.puts(CGI.unescape(output.gsub("---\n", ''))) | |
end | |
sleep 5 | |
end | |
rescue Exception => e | |
p e.inspect | |
end | |
en_path = '' | |
languages = [] | |
ARGV.each do|a| | |
en_path = a.sub('-enFilePath=', '') if a.include?('-enFilePath=') | |
languages = a.sub('-destLangs=', '').split(',') if a.include?('-destLangs=') | |
end | |
if en_path != '' && languages.length > 0 | |
translate_file(en_path, languages) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment