Skip to content

Instantly share code, notes, and snippets.

@Kimtaro
Created June 15, 2012 13:30
Show Gist options
  • Save Kimtaro/2936471 to your computer and use it in GitHub Desktop.
Save Kimtaro/2936471 to your computer and use it in GitHub Desktop.
Convert Kanji numerical to roman numerical
# Encoding: UTF-8
module KanjiToNumber
POS_FOR_DEN = {'十' => 1, '百' => 2, '千' => 3, '万' => 4}
NUM_FOR_NUM = {'壱' => '1', '一' => '1', '1' => '1',
'弐' => '2', '二' => '2', '2' => '2',
'参' => '3', '三' => '3', '3' => '3',
'四' => '4', '4' => '4',
'五' => '5', '5' => '5',
'六' => '6', '6' => '6',
'七' => '7', '7' => '7',
'八' => '8', '8' => '8',
'九' => '9', '9' => '9',
'零' => '0', '〇' => '0', '0' => '0'}
def self.convert(kanji)
number = ''
letters = kanji.chars.to_a.reverse
letters.each_with_index do |letter, i|
if NUM_FOR_NUM.has_key?(letter)
number = "#{NUM_FOR_NUM[letter]}#{number}"
elsif POS_FOR_DEN.has_key?(letter)
add_one = (i == letters.size-1 || (i < letters.size-1 && POS_FOR_DEN.has_key?(letters[i+1])))
add_zero = (i == 0 || i == letters.size-1)
position = POS_FOR_DEN[letter] - (add_one || add_zero ? 1 : 0) + (add_zero ? 1 : 0)
while number.length < position
number = "0#{number}"
end
number = "1#{number}" if add_one
end
end
number.to_i
end
# Another method, WIP
DENS = %r{ [#{POS_FOR_DEN.keys.join}] }x
NUMS = %r{ [#{NUM_FOR_NUM.keys.join}] }x
MATCHER = %r{ (?<num> #{NUMS}+ )? (?<denom> #{DENS} ) }x
def self.convert2(kanji)
number = ''
last_pos = 0
puts MATCHER.inspect
kanji.gsub!(MATCHER) do |md|
puts "---"
num = $1.nil? ? '一' : $1
puts num
denom = $2.nil? ? 0 : POS_FOR_DEN[$2]
puts denom
if number.length == 0
number = '0' * (denom + num.length)
end
puts number
conv_num = num.chars.to_a.collect { |n| NUM_FOR_NUM[n] }.join
number[last_pos, (conv_num.length)] = conv_num
last_pos = last_pos + conv_num.length
''
end
number.to_i
end
end
require 'minitest/autorun'
class KanjiToNumberTest < MiniTest::Unit::TestCase
def test_convert
assert_equal 2012, KanjiToNumber.convert('二千十二')
assert_equal 2012, KanjiToNumber.convert('二〇一二')
assert_equal 2012, KanjiToNumber.convert('二〇一二')
assert_equal 2222, KanjiToNumber.convert('二千二百二十二')
assert_equal 2022, KanjiToNumber.convert('二千二十二')
assert_equal 2002, KanjiToNumber.convert('二千二')
assert_equal 2000, KanjiToNumber.convert('二千')
assert_equal 1000, KanjiToNumber.convert('千')
assert_equal 2220, KanjiToNumber.convert('二千二百二十')
assert_equal 2200, KanjiToNumber.convert('二千二百')
assert_equal 2002, KanjiToNumber.convert('二千二')
assert_equal 1222, KanjiToNumber.convert('千二百二十二')
assert_equal 222, KanjiToNumber.convert('二百二十二')
assert_equal 122, KanjiToNumber.convert('百二十二')
assert_equal 321, KanjiToNumber.convert('三二一')
assert_equal 4321, KanjiToNumber.convert('四三二一')
assert_equal 10001, KanjiToNumber.convert('万一')
assert_equal 10001, KanjiToNumber.convert('一万一')
assert_equal 240001, KanjiToNumber.convert('24万一')
assert_equal 11110, KanjiToNumber.convert('万千百十')
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment