Skip to content

Instantly share code, notes, and snippets.

@koshigoe
Created July 27, 2008 13:54
Show Gist options
  • Save koshigoe/2774 to your computer and use it in GitHub Desktop.
Save koshigoe/2774 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
$KCODE = 'utf8'
# 漢数字を数値に変換する
#
# * http://ja.wikipedia.org/wiki/%E6%BC%A2%E6%95%B0%E5%AD%97
# * http://www.akatsukinishisu.net/kanji/kansuji.html
module Kansuji
module Const
NUMBERS = {
'0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4,
'5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9,
}
UNITS = {
'十' => 10,
'百' => 10 ** 2,
'千' => 10 ** 3,
}
PLACES = {
'万' => 10000,
'億' => 10000 ** 2,
'兆' => 10000 ** 3,
'京' => 10000 ** 4,
'垓' => 10000 ** 5,
}
NORMALIZE_TABLE = {
'零' => '0', '〇' => '0',
'一' => '1', '壱' => '1', '壹' => '1',
'二' => '2', '弐' => '2', '貳' => '2', '貮' => '2',
'三' => '3', '参' => '3', '參' => '3', '弎' => '3',
'四' => '4', '肆' => '4',
'五' => '5', '伍' => '5',
'六' => '6', '陸' => '6',
'七' => '7', '漆' => '7', '柒' => '7', '質' => '7',
'八' => '8', '捌' => '8',
'九' => '9', '玖' => '9',
'拾' => '十',
'廿' => '2十',
'卅' => '3十', '丗' => '3十',
'卌' => '4十',
'佰' => '百',
'仟' => '千', '阡' => '千',
'萬' => '万',
}
CHARACTERS = [
NUMBERS,
UNITS,
PLACES,
NORMALIZE_TABLE
].map { |item| item.keys }.flatten.uniq
CHARACTERS_RE = /\A[#{CHARACTERS.join}]+\z/
end
def kansuji_normalize
self.split(//).map { |chr| Kansuji::Const::NORMALIZE_TABLE[chr] || chr }.join
end
def kansuji_valid?
(Kansuji::Const::CHARACTERS_RE =~ self) ? true : false
end
def kansuji_split_by_place
source = self
place_chars = Kansuji::Const::PLACES.keys.join
place_parts = []
while /([^#{place_chars}]+)([#{place_chars}])(.*)/ =~ source
place_parts << { :source => $1, :base => Kansuji::Const::PLACES[$2] }
source = $3
end
place_parts << { :source => source, :base => 1 } unless source.empty?
place_parts
end
def kansuji_split_by_unit
source = self
unit_chars = Kansuji::Const::UNITS.keys.join
unit_parts = []
while /([^#{unit_chars}]*)([#{unit_chars}])(.*)/ =~ source
unit_parts << { :source => ($1.empty?) ? '1' : $1, :base => Kansuji::Const::UNITS[$2] }
source = $3
end
unit_parts << { :source => source, :base => 1 } unless source.empty?
unit_parts
end
def kansuji_to_i
return 0 unless self.kansuji_valid?
source = self.kansuji_normalize
result = 0
source.kansuji_split_by_place.each do |place_part|
place_result = 0
place_part[:source].kansuji_split_by_unit.each do |unit_part|
unit_part[:source].split(//).reverse.each_with_index do |n, i|
place_result += Kansuji::Const::NUMBERS[n] * unit_part[:base] * (10 ** i)
end
end
result += place_result * place_part[:base]
end
result
end
end
class String
include Kansuji
alias_method :original_to_i, :to_i
def to_i(base = 10)
self.kansuji_valid? ? self.kansuji_to_i : self.original_to_i(base)
end
end
if File.basename($0) == File.basename(__FILE__)
require 'test/unit'
class TC_Kansuji < Test::Unit::TestCase
def test_normalize
testcases = {
'零〇' => '00',
'一壱壹' => '111',
'二弐貳貮' => '2222',
'三参參弎' => '3333',
'四肆' => '44',
'五伍' => '55',
'六陸' => '66',
'七漆柒質' => '7777',
'八捌' => '88',
'九玖' => '99',
'拾' => '十',
'廿' => '2十',
'卅丗' => '3十3十',
'卌' => '4十',
'佰' => '百',
'仟阡' => '千千',
'萬' => '万',
}
testcases.each do |str, expected|
assert_equal(expected, actual = str.kansuji_normalize, actual)
end
end
def test_validation
testcases = {
'零〇一壱壹二弐貳貮三参參弎四肆五伍六陸' \
'七漆柒質八捌九玖拾廿卅丗卌佰仟阡萬' => true,
'零〇一壱壹二弐貳貮三参參弎四肆五伍六陸' \
'七漆柒質八捌九玖拾廿卅丗卌佰仟阡萬0' => false,
}
testcases.each do |str, expected|
assert_equal(expected, actual = str.kansuji_valid?)
end
end
def test_kansuji_split_by_place
testcases = {
'123垓456京789兆123億456万7890' => [
{ :source => '123', :base => Kansuji::Const::PLACES['垓'] },
{ :source => '456', :base => Kansuji::Const::PLACES['京'] },
{ :source => '789', :base => Kansuji::Const::PLACES['兆'] },
{ :source => '123', :base => Kansuji::Const::PLACES['億'] },
{ :source => '456', :base => Kansuji::Const::PLACES['万'] },
{ :source => '7890', :base => 1 },
]
}
testcases.each do |source, expected|
assert_equal(expected, source.kansuji_split_by_place)
end
end
def test_kansuji_split_by_unit
testcases = {
'2千3百4十5' => [
{ :source => '2', :base => Kansuji::Const::UNITS['千'] },
{ :source => '3', :base => Kansuji::Const::UNITS['百'] },
{ :source => '4', :base => Kansuji::Const::UNITS['十'] },
{ :source => '5', :base => 1 },
]
}
testcases.each do |source, expected|
assert_equal(expected, source.kansuji_split_by_unit)
end
end
def test_to_i
testcases = {
'1234567890' => 1234567890,
'一二三四五六七八九零' => 1234567890,
'千百十' => 1110,
'一千一百一十零億三千' => 111000003000,
'拾' => 10,
'廿' => 20,
'卅' => 30,
'卌' => 40,
'佰' => 100,
'仟' => 1000,
'1萬' => 10000,
'1234567890千' => 1234567890000,
'1234567890万' => 12345678900000,
'1234567890億' => 123456789000000000,
'1億1千万五十三' => 110000053,
}
testcases.each do |str, expected|
assert_equal(expected, str.kansuji_to_i, str)
assert_equal(expected, str.to_i, str)
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment