Created
July 27, 2008 13:54
-
-
Save koshigoe/2774 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
$KCODE = 'utf8' | |
# 漢数字を数値に変換する | |
# | |
# * http://ja.wikipedia.org/wiki/%E6%BC%A2%E6%95%B0%E5%AD%97 | |
# * http://www.akatsukinishisu.net/kanji/kansuji.html | |
module Kansuji | |
module Const | |
NUMBERS = { | |
'0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4, | |
'5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9, | |
} | |
UNITS = { | |
'十' => 10, | |
'百' => 10 ** 2, | |
'千' => 10 ** 3, | |
} | |
PLACES = { | |
'万' => 10000, | |
'億' => 10000 ** 2, | |
'兆' => 10000 ** 3, | |
'京' => 10000 ** 4, | |
'垓' => 10000 ** 5, | |
} | |
NORMALIZE_TABLE = { | |
'零' => '0', '〇' => '0', | |
'一' => '1', '壱' => '1', '壹' => '1', | |
'二' => '2', '弐' => '2', '貳' => '2', '貮' => '2', | |
'三' => '3', '参' => '3', '參' => '3', '弎' => '3', | |
'四' => '4', '肆' => '4', | |
'五' => '5', '伍' => '5', | |
'六' => '6', '陸' => '6', | |
'七' => '7', '漆' => '7', '柒' => '7', '質' => '7', | |
'八' => '8', '捌' => '8', | |
'九' => '9', '玖' => '9', | |
'拾' => '十', | |
'廿' => '2十', | |
'卅' => '3十', '丗' => '3十', | |
'卌' => '4十', | |
'佰' => '百', | |
'仟' => '千', '阡' => '千', | |
'萬' => '万', | |
} | |
CHARACTERS = [ | |
NUMBERS, | |
UNITS, | |
PLACES, | |
NORMALIZE_TABLE | |
].map { |item| item.keys }.flatten.uniq | |
CHARACTERS_RE = /\A[#{CHARACTERS.join}]+\z/ | |
end | |
def kansuji_normalize | |
self.split(//).map { |chr| Kansuji::Const::NORMALIZE_TABLE[chr] || chr }.join | |
end | |
def kansuji_valid? | |
(Kansuji::Const::CHARACTERS_RE =~ self) ? true : false | |
end | |
def kansuji_split_by_place | |
source = self | |
place_chars = Kansuji::Const::PLACES.keys.join | |
place_parts = [] | |
while /([^#{place_chars}]+)([#{place_chars}])(.*)/ =~ source | |
place_parts << { :source => $1, :base => Kansuji::Const::PLACES[$2] } | |
source = $3 | |
end | |
place_parts << { :source => source, :base => 1 } unless source.empty? | |
place_parts | |
end | |
def kansuji_split_by_unit | |
source = self | |
unit_chars = Kansuji::Const::UNITS.keys.join | |
unit_parts = [] | |
while /([^#{unit_chars}]*)([#{unit_chars}])(.*)/ =~ source | |
unit_parts << { :source => ($1.empty?) ? '1' : $1, :base => Kansuji::Const::UNITS[$2] } | |
source = $3 | |
end | |
unit_parts << { :source => source, :base => 1 } unless source.empty? | |
unit_parts | |
end | |
def kansuji_to_i | |
return 0 unless self.kansuji_valid? | |
source = self.kansuji_normalize | |
result = 0 | |
source.kansuji_split_by_place.each do |place_part| | |
place_result = 0 | |
place_part[:source].kansuji_split_by_unit.each do |unit_part| | |
unit_part[:source].split(//).reverse.each_with_index do |n, i| | |
place_result += Kansuji::Const::NUMBERS[n] * unit_part[:base] * (10 ** i) | |
end | |
end | |
result += place_result * place_part[:base] | |
end | |
result | |
end | |
end | |
class String | |
include Kansuji | |
alias_method :original_to_i, :to_i | |
def to_i(base = 10) | |
self.kansuji_valid? ? self.kansuji_to_i : self.original_to_i(base) | |
end | |
end | |
if File.basename($0) == File.basename(__FILE__) | |
require 'test/unit' | |
class TC_Kansuji < Test::Unit::TestCase | |
def test_normalize | |
testcases = { | |
'零〇' => '00', | |
'一壱壹' => '111', | |
'二弐貳貮' => '2222', | |
'三参參弎' => '3333', | |
'四肆' => '44', | |
'五伍' => '55', | |
'六陸' => '66', | |
'七漆柒質' => '7777', | |
'八捌' => '88', | |
'九玖' => '99', | |
'拾' => '十', | |
'廿' => '2十', | |
'卅丗' => '3十3十', | |
'卌' => '4十', | |
'佰' => '百', | |
'仟阡' => '千千', | |
'萬' => '万', | |
} | |
testcases.each do |str, expected| | |
assert_equal(expected, actual = str.kansuji_normalize, actual) | |
end | |
end | |
def test_validation | |
testcases = { | |
'零〇一壱壹二弐貳貮三参參弎四肆五伍六陸' \ | |
'七漆柒質八捌九玖拾廿卅丗卌佰仟阡萬' => true, | |
'零〇一壱壹二弐貳貮三参參弎四肆五伍六陸' \ | |
'七漆柒質八捌九玖拾廿卅丗卌佰仟阡萬0' => false, | |
} | |
testcases.each do |str, expected| | |
assert_equal(expected, actual = str.kansuji_valid?) | |
end | |
end | |
def test_kansuji_split_by_place | |
testcases = { | |
'123垓456京789兆123億456万7890' => [ | |
{ :source => '123', :base => Kansuji::Const::PLACES['垓'] }, | |
{ :source => '456', :base => Kansuji::Const::PLACES['京'] }, | |
{ :source => '789', :base => Kansuji::Const::PLACES['兆'] }, | |
{ :source => '123', :base => Kansuji::Const::PLACES['億'] }, | |
{ :source => '456', :base => Kansuji::Const::PLACES['万'] }, | |
{ :source => '7890', :base => 1 }, | |
] | |
} | |
testcases.each do |source, expected| | |
assert_equal(expected, source.kansuji_split_by_place) | |
end | |
end | |
def test_kansuji_split_by_unit | |
testcases = { | |
'2千3百4十5' => [ | |
{ :source => '2', :base => Kansuji::Const::UNITS['千'] }, | |
{ :source => '3', :base => Kansuji::Const::UNITS['百'] }, | |
{ :source => '4', :base => Kansuji::Const::UNITS['十'] }, | |
{ :source => '5', :base => 1 }, | |
] | |
} | |
testcases.each do |source, expected| | |
assert_equal(expected, source.kansuji_split_by_unit) | |
end | |
end | |
def test_to_i | |
testcases = { | |
'1234567890' => 1234567890, | |
'一二三四五六七八九零' => 1234567890, | |
'千百十' => 1110, | |
'一千一百一十零億三千' => 111000003000, | |
'拾' => 10, | |
'廿' => 20, | |
'卅' => 30, | |
'卌' => 40, | |
'佰' => 100, | |
'仟' => 1000, | |
'1萬' => 10000, | |
'1234567890千' => 1234567890000, | |
'1234567890万' => 12345678900000, | |
'1234567890億' => 123456789000000000, | |
'1億1千万五十三' => 110000053, | |
} | |
testcases.each do |str, expected| | |
assert_equal(expected, str.kansuji_to_i, str) | |
assert_equal(expected, str.to_i, str) | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment