Skip to content

Instantly share code, notes, and snippets.

@tauty
Last active July 3, 2022 23:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tauty/ed457b64dc6c4ef7346d1ce2716e9312 to your computer and use it in GitHub Desktop.
Save tauty/ed457b64dc6c4ef7346d1ce2716e9312 to your computer and use it in GitHub Desktop.
/*
* Copyright 2022 tetsuo.ohta[at]gmail.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var kanjiToNum = function () {
function ten(n) {
if(n === 0) return 1;
if(n > 0) return ten(n - 1) * 10;
if(n < 0) return ten(n + 1) / 10;
}
var digit_table = {"〇":0, "一":1, "二":2, "三":3, "四":4, "五":5, "六":6, "七":7, "八":8, "九":9,
"O":0, "O":0, "○":0, "◯":0, // 漢数字「〇」と間違って使われそうな文字
"零":0, "壱":1, "弐":2, "参":3, "肆":4, "伍":5, "陸":6, "漆":7, "質":7, "捌":8, "玖":9, // 大字(だいじ)
"壹":1, "弌":1, "貳":2, "貮":2, "弍":2, "參":3, "弎":3, "柒":7, "亖":4, // 旧字体・俗字・古字
"0":0, "1":1, "2":2, "3":3, "4":4, "5":5, "6":6, "7":7, "8":8, "9":9, // 全角
"0":0, "1":1, "2":2, "3":3, "4":4, "5":5, "6":6, "7":7, "8":8, "9":9}; // 半角
var small_decimals = {"十":10, "百":100, "千":1000, "拾":10, "什":10, "佰":100, "陌":100, "仟":1000, "阡":1000};
var large_decimals = {"万":ten(4), "萬":ten(4), "億":ten(8), "兆":ten(12), "京": ten(16), "垓": ten(20)};
var minusReg = /[-−ー―‐]/, pointReg = /[..]/, ignoreReg = /[,,]/;
return function (kanji) {
var sign = minusReg.test(kanji.charAt(0)) ? -1 : 1;
var total = 0, subtotal = 0, digit = 0, figure = 0;
for(var i = (sign === -1 ? 1 : 0); i<kanji.length; i++) {
var ch = kanji.charAt(i);
var r = digit_table[ch];
if(r || r === 0) {
digit = figure ? digit + r * ten(figure--) // 少数部
: digit * 10 + r; // 整数部
} else if(r = small_decimals[ch]) {
subtotal += (digit || 1) * r;
digit = figure = 0;
} else if(r = large_decimals[ch]) {
total += ((subtotal + digit) || 1) * r;
subtotal = digit = figure = 0;
} else {
if(pointReg.test(ch)) figure = -1;
else if(!ignoreReg.test(ch)) throw new Error("Unknown charactor, '" + ch + "' is at index " + i + " of " + kanji);
}
}
return sign * (total + subtotal + digit);
}
}();
/** Test
// 漢数字表記
kanjiToNum("零"); // 0
kanjiToNum("一"); // 1
kanjiToNum("七"); // 7
kanjiToNum("十"); // 10
kanjiToNum("十三"); // 13
kanjiToNum("四十三"); // 43
kanjiToNum("三千二百一"); // 3201
kanjiToNum("三十万千五百九十二"); // 301592
kanjiToNum("七十五億三十万千五百九十二"); // 7500301592
kanjiToNum("百三十兆千七百五億三十万千五百九十二"); // 130170500301592
kanjiToNum("億千万"); // 110000000
// アラビア数字風表記
kanjiToNum("一〇"); // 10
kanjiToNum("一O"); // 10
kanjiToNum("一O"); // 10
kanjiToNum("一○"); // 10
kanjiToNum("一◯"); // 10
kanjiToNum("一三"); // 13
kanjiToNum("四三"); // 43
kanjiToNum("三二0一"); // 3201
kanjiToNum("三〇一五九二"); // 301592
kanjiToNum("〇.五"); // 0.5
kanjiToNum("一.三五"); // 1.35
kanjiToNum("一〇.五〇三"); // 10.503
kanjiToNum("一二三,四五六.七八"); // 123456.78
// Mixed
kanjiToNum("五〇万"); // 500000
kanjiToNum("50万"); // 500000
kanjiToNum("ー50万"); // -500000
kanjiToNum("50万"); // 500000
kanjiToNum("-50万"); // -500000
kanjiToNum("一三億六三万一三八"); // 1300630138
kanjiToNum("2億4千万"); // 240000000
kanjiToNum("2.4億"); // 240000000
kanjiToNum("-2.4億"); // -240000000
kanjiToNum("1.03百万18"); // 1030018 (普通はこんな表記しないが、一応変換できる)
kanjiToNum("1億2263万8千") // 122638000
kanjiToNum("1,073,115百万"); // 1073115000000
kanjiToNum("301,592.135"); // 301592.135
// 大字(だいじ)
kanjiToNum("壱萬伍阡"); // 15000
kanjiToNum("参拾萬肆阡伍陌漆"); // 304507
kanjiToNum("参〇肆伍〇柒"); // 304507
// Error
kanjiToNum("3億8214方1125"); // "Unknown charactor, '方' is at index 6 of 3億8214方1125"
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment