Skip to content

Instantly share code, notes, and snippets.

@clicube
Created October 21, 2012 18:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save clicube/3927974 to your computer and use it in GitHub Desktop.
Save clicube/3927974 to your computer and use it in GitHub Desktop.
seibenize string
# coding: utf-8
require 'uri'
require 'open-uri'
require 'rexml/document'
require 'json'
require_relative 'romkan'
# http://0xcc.net/ruby-romkan/
YAHOO_APP_ID = 'YOUR YAHOO APPLICATION ID'
INVERSION_RATE = 0.03
TYPO_RATE = 0.03
TYPO_MAP = {
'a' => %w[q w s z],
'b' => %w[v g h n],
'c' => %w[x d f v],
'd' => %w[s e r f c x],
'e' => %w[w r d s],
'f' => %w[d r t g v c],
'g' => %w[f t y h b v],
'h' => %w[g y u j n b],
'i' => %w[u o k j],
'j' => %w[h u i k m n],
'k' => %w[j i o l m],
'l' => %w[k o p],
'm' => %w[n j k],
'n' => %w[b h j m],
'o' => %w[i p l k],
'p' => %w[o l],
'q' => %w[w a],
'r' => %w[e t f d],
's' => %w[a w e d x z],
't' => %w[r y g f],
'u' => %w[y i j h],
'v' => %w[c f g b],
'w' => %w[q e s a],
'x' => %w[z s d c],
'y' => %w[t u h g],
'z' => %w[a s x]
}
class String
def to_seibe
# puts "input: #{self}"
kana = ''
uri = 'http://jlp.yahooapis.jp/MAService/V1/parse?'
params = {
'appid' => YAHOO_APP_ID,
'sentence' => self,
'results' => 'ma',
'response' => 'reading'
}
uri << params.map{|k,v| "#{k}=#{URI.encode(v)}" }.join('&')
response = open(uri).read
xmldoc = REXML::Document.new response
xmldoc.elements.each('ResultSet/ma_result/word_list/word/reading') do |item|
kana << item.text
end
# puts "kana: #{kana}"
roma = kana.to_roma
# puts "roma: #{roma}"
roma = roma.split(//).inject do |res,c|
if rand > INVERSION_RATE
res << c
else
res[res.length-1,0] = c
end
res
end
roma = roma.split(//).map do |c|
if rand < TYPO_RATE && TYPO_MAP.has_key?(c)
TYPO_MAP[c].sample
else
c
end
end.join
seibed_roma = roma
# puts "seibed_roma: #{seibed_roma}"
seibed_kana = seibed_roma.to_kana
# puts "seibed_kana: #{seibed_kana}"
seibed_text = ''
uri = 'http://www.google.com/transliterate?'
params = {
'langpair' => 'ja-Hira|ja',
'text' => seibed_kana
}
uri << params.map{|k,v| "#{k}=#{URI.encode(v)}" }.join('&')
response = open(uri).read
translated_obj = JSON.parse(response)
translated_obj.each do |elm|
seibed_text << elm[1][0]
end
# puts "seibed_text: #{seibed_text}"
seibed_text
end
end
if $0 == __FILE__
ARGV.each do |arg|
puts arg.to_seibe
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment