Skip to content

Instantly share code, notes, and snippets.

@CodeMonkeySteve
Created March 2, 2015 19:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CodeMonkeySteve/0aaf78c3ca996cfaf5c6 to your computer and use it in GitHub Desktop.
Save CodeMonkeySteve/0aaf78c3ca996cfaf5c6 to your computer and use it in GitHub Desktop.
XLIFF Translator
#!/usr/bin/env ruby
require 'bing_translator'
require 'active_support'
require 'active_support/core_ext'
class BingTranslator
@@cache = {}
@@cache_path = __dir__+'/.xliff_trans_cache'
@@cache = YAML.load_file(@@cache_path) if File.exist?(@@cache_path)
def self.save_cache
YAML.dump(@@cache, File.open(@@cache_path, 'w'))
end
TRANSLATE_ARRAY_URI = URI.parse('http://api.microsofttranslator.com/V2/Http.svc/TranslateArray').freeze
alias_method :translate_one, :translate
def translate(*strings, to:, from: 'en')
return [] if strings.empty?
cache = (@@cache[to.to_s] ||= {})
trans = strings.reject { |str| cache.include?(str) }.uniq
trans.reject! do |str|
if str.match /<[\w-]+>/
res = translate_one(str, from: from, to: to, contentType: 'text/html')
cache[str] = res if res.present?
true
end
end
if trans.present?
params = { texts: trans, from: CGI.escape(from.to_s), to: CGI.escape(to.to_s) }
doc = Nokogiri.XML( array_result(TRANSLATE_ARRAY_URI, params).body )
res = doc.xpath('xmlns:ArrayOfTranslateArrayResponse/xmlns:TranslateArrayResponse/xmlns:TranslatedText').map(&:text)
trans.each.with_index { |src, idx| cache[src] = res[idx] if res[idx].present? }
end
strings.map { |str| [str, cache[str]] }.to_h
end
def array_result(uri, params = {}, headers = {})
get_access_token
http = Net::HTTP.new(uri.host, uri.port)
if uri.scheme == "https"
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if @skip_ssl_verify
end
builder = Nokogiri::XML::Builder.new do |xml|
xml.TranslateArrayRequest do
xml.AppId
xml.From_ params[:from]
xml.Options_ do
xml.CotentType({xmlns: 'http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2'}, 'text/html')
end
xml.Texts do
params[:texts].each do |text|
xml.string({xmlns: 'http://schemas.microsoft.com/2003/10/Serialization/Arrays'}, text )
end
end
xml.To_ params[:to]
end
end
request = Net::HTTP::Post.new(uri.path)
request.add_field 'Content-Type', 'application/xml'
request.add_field 'Authorization',"Bearer #{@access_token['access_token']}"
request.body = builder.to_xml(indent: 2)
results = http.request(request)
if results.response.code.to_i == 200
results
else
html = Nokogiri::HTML(results.body)
raise Exception, html.xpath("//text()").remove.map(&:to_s).join(' ')
end
end
end
translator = BingTranslator.new('org-finagle-sandbox', 'YdNt0qFIsWChIp5uDlcfLq4td8Xn9MCXvEcBGsXEe/I=')
ARGV.each do |path|
xliff = Nokogiri.XML(File.read(path))
file = xliff.at_xpath('/xmlns:xliff/xmlns:file')
raise "Missing file tag" unless file
datatype = file['datatype']
raise "Unsupported datatype #{datatype}" unless datatype == 'plaintext'
src_locale, tgt_locale = file['source-language'], file['target-language']
trans = {}
file.xpath('//xmlns:trans-unit').each do |unit|
src = unit.at_xpath('xmlns:source')
raise "Missing source translation" unless src.present?
next unless tgt = unit.at_xpath('xmlns:target')
trans[tgt] = src.text
end
res = translator.translate(*trans.values, to: tgt_locale, from: src_locale)
trans.each do |tgt, src_text|
tgt.content = res[src_text]
end
out_path = path.gsub(/^([^.]+)(\..+)$/, '\1-auto\2')
File.open(out_path, 'w') { |f| f.write(xliff.to_xml(indent: 2)) }
end
BingTranslator.save_cache
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment