Created
February 8, 2017 00:44
-
-
Save nyarla/d88917bf19b65143f9fc4107e20ba3df to your computer and use it in GitHub Desktop.
A reverse porting to Modern Javascript from optimized implementation of TinySegmenter written by Julia-lang.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* TinySegmenter.optmized.js | |
* ========================= | |
* | |
* * A reverse porting to Modern Javascript from optimized implementation of TinySegmenter written by Julia-lang. | |
* | |
* LICENSE | |
* ======= | |
* (c) 2008 Taku Kudo <taku@chasen.org> | |
* (c) 2015 Michiaki Ariga a.k.a chezou <chezou@gmail.com> | |
* (c) 2017 Naoki OKAMURA a.k.a nyarla <nyarla@thotep.net> | |
* | |
* All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions are met: | |
* | |
* * Redistributions of source code must retain the above copyright notice, | |
* this list of conditions and the following disclaimer. | |
* * Redistributions in binary form must reproduce the above copyright | |
* notice, this list of conditions and the following disclaimer in the | |
* documentation and/or other materials provided with the distribution. | |
* * Neither the name of the <ORGANIZATION> nor the names of its | |
* contributors may be used to endorse or promote products derived from this | |
* software without specific prior written permission. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
* */ | |
"use strict"; | |
var BIAS = -332; | |
var BC1 = new Map([ | |
["HH" , 6 ], | |
["II" , 2461 ], | |
["KH" , 406 ], | |
["OH" , -1378] | |
]); | |
var BC2 = new Map([ | |
["AA" , -3267 ], | |
["AI" , 2744 ], | |
["AN" , -878 ], | |
["HH" , -4070 ], | |
["HM" , -1711 ], | |
["HN" , 4012 ], | |
["HO" , 3761 ], | |
["IA" , 1327 ], | |
["IH" , -1184 ], | |
["II" , -1332 ], | |
["IK" , 1721 ], | |
["IO" , 5492 ], | |
["KI" , 3831 ], | |
["KK" , -8741 ], | |
["MH" , -3132 ], | |
["MK" , 3334 ], | |
["OO" , -2920] | |
]); | |
var BC3 = new Map([ | |
["HH" , 996 ], | |
["HI" , 626 ], | |
["HK" , -721 ], | |
["HN" , -1307 ], | |
["HO" , -836 ], | |
["IH" , -301 ], | |
["KK" , 2762 ], | |
["MK" , 1079 ], | |
["MM" , 4034 ], | |
["OA" , -1652 ], | |
["OH" , 266] | |
]); | |
var BP1 = new Map([ | |
["BB" , 295 ], | |
["OB" , 304 ], | |
["OO" , -125 ], | |
["UB" , 352] | |
]); | |
var BP2 = new Map([ | |
["BO" , 60 ], | |
["OO" , -1762] | |
]); | |
var BQ1 = new Map([ | |
["BHH" , 1150 ], | |
["BHM" , 1521 ], | |
["BII" , -1158 ], | |
["BIM" , 886 ], | |
["BMH" , 1208 ], | |
["BNH" , 449 ], | |
["BOH" , -91 ], | |
["BOO" , -2597 ], | |
["OHI" , 451 ], | |
["OIH" , -296 ], | |
["OKA" , 1851 ], | |
["OKH" , -1020 ], | |
["OKK" , 904 ], | |
["OOO" , 2965] | |
]); | |
var BQ2 = new Map([ | |
["BHH" , 118 ], | |
["BHI" , -1159 ], | |
["BHM" , 466 ], | |
["BIH" , -919 ], | |
["BKK" , -1720 ], | |
["BKO" , 864 ], | |
["OHH" , -1139 ], | |
["OHM" , -181 ], | |
["OIH" , 153 ], | |
["UHI" , -1146] | |
]); | |
var BQ3 = new Map([ | |
["BHH" , -792 ], | |
["BHI" , 2664 ], | |
["BII" , -299 ], | |
["BKI" , 419 ], | |
["BMH" , 937 ], | |
["BMM" , 8335 ], | |
["BNN" , 998 ], | |
["BOH" , 775 ], | |
["OHH" , 2174 ], | |
["OHM" , 439 ], | |
["OII" , 280 ], | |
["OKH" , 1798 ], | |
["OKI" , -793 ], | |
["OKO" , -2242 ], | |
["OMH" , -2402 ], | |
["OOO" , 11699] | |
]); | |
var BQ4 = new Map([ | |
["BHH" , -3895 ], | |
["BIH" , 3761 ], | |
["BII" , -4654 ], | |
["BIK" , 1348 ], | |
["BKK" , -1806 ], | |
["BMI" , -3385 ], | |
["BOO" , -12396 ], | |
["OAH" , 926 ], | |
["OHH" , 266 ], | |
["OHK" , -2036 ], | |
["ONN" , -973] | |
]); | |
var BW1 = new Map([ | |
[",と" , 660 ], | |
[",同" , 727 ], | |
["B1あ" , 1404 ], | |
["B1同" , 542 ], | |
["、と" , 660 ], | |
["、同" , 727 ], | |
["」と" , 1682 ], | |
["あっ" , 1505 ], | |
["いう" , 1743 ], | |
["いっ" , -2055 ], | |
["いる" , 672 ], | |
["うし" , -4817 ], | |
["うん" , 665 ], | |
["から" , 3472 ], | |
["がら" , 600 ], | |
["こう" , -790 ], | |
["こと" , 2083 ], | |
["こん" , -1262 ], | |
["さら" , -4143 ], | |
["さん" , 4573 ], | |
["した" , 2641 ], | |
["して" , 1104 ], | |
["すで" , -3399 ], | |
["そこ" , 1977 ], | |
["それ" , -871 ], | |
["たち" , 1122 ], | |
["ため" , 601 ], | |
["った" , 3463 ], | |
["つい" , -802 ], | |
["てい" , 805 ], | |
["てき" , 1249 ], | |
["でき" , 1127 ], | |
["です" , 3445 ], | |
["では" , 844 ], | |
["とい" , -4915 ], | |
["とみ" , 1922 ], | |
["どこ" , 3887 ], | |
["ない" , 5713 ], | |
["なっ" , 3015 ], | |
["など" , 7379 ], | |
["なん" , -1113 ], | |
["にし" , 2468 ], | |
["には" , 1498 ], | |
["にも" , 1671 ], | |
["に対" , -912 ], | |
["の一" , -501 ], | |
["の中" , 741 ], | |
["ませ" , 2448 ], | |
["まで" , 1711 ], | |
["まま" , 2600 ], | |
["まる" , -2155 ], | |
["やむ" , -1947 ], | |
["よっ" , -2565 ], | |
["れた" , 2369 ], | |
["れで" , -913 ], | |
["をし" , 1860 ], | |
["を見" , 731 ], | |
["亡く" , -1886 ], | |
["京都" , 2558 ], | |
["取り" , -2784 ], | |
["大き" , -2604 ], | |
["大阪" , 1497 ], | |
["平方" , -2314 ], | |
["引き" , -1336 ], | |
["日本" , -195 ], | |
["本当" , -2423 ], | |
["毎日" , -2113 ], | |
["目指" , -724 ], | |
["B1あ" , 1404 ], | |
["B1同" , 542 ], | |
["」と" , 1682] | |
]); | |
var BW2 = new Map([ | |
[".." , -11822 ], | |
["11" , -669 ], | |
["――" , -5730 ], | |
["−−" , -13175 ], | |
["いう" , -1609 ], | |
["うか" , 2490 ], | |
["かし" , -1350 ], | |
["かも" , -602 ], | |
["から" , -7194 ], | |
["かれ" , 4612 ], | |
["がい" , 853 ], | |
["がら" , -3198 ], | |
["きた" , 1941 ], | |
["くな" , -1597 ], | |
["こと" , -8392 ], | |
["この" , -4193 ], | |
["させ" , 4533 ], | |
["され" , 13168 ], | |
["さん" , -3977 ], | |
["しい" , -1819 ], | |
["しか" , -545 ], | |
["した" , 5078 ], | |
["して" , 972 ], | |
["しな" , 939 ], | |
["その" , -3744 ], | |
["たい" , -1253 ], | |
["たた" , -662 ], | |
["ただ" , -3857 ], | |
["たち" , -786 ], | |
["たと" , 1224 ], | |
["たは" , -939 ], | |
["った" , 4589 ], | |
["って" , 1647 ], | |
["っと" , -2094 ], | |
["てい" , 6144 ], | |
["てき" , 3640 ], | |
["てく" , 2551 ], | |
["ては" , -3110 ], | |
["ても" , -3065 ], | |
["でい" , 2666 ], | |
["でき" , -1528 ], | |
["でし" , -3828 ], | |
["です" , -4761 ], | |
["でも" , -4203 ], | |
["とい" , 1890 ], | |
["とこ" , -1746 ], | |
["とと" , -2279 ], | |
["との" , 720 ], | |
["とみ" , 5168 ], | |
["とも" , -3941 ], | |
["ない" , -2488 ], | |
["なが" , -1313 ], | |
["など" , -6509 ], | |
["なの" , 2614 ], | |
["なん" , 3099 ], | |
["にお" , -1615 ], | |
["にし" , 2748 ], | |
["にな" , 2454 ], | |
["によ" , -7236 ], | |
["に対" , -14943 ], | |
["に従" , -4688 ], | |
["に関" , -11388 ], | |
["のか" , 2093 ], | |
["ので" , -7059 ], | |
["のに" , -6041 ], | |
["のの" , -6125 ], | |
["はい" , 1073 ], | |
["はが" , -1033 ], | |
["はず" , -2532 ], | |
["ばれ" , 1813 ], | |
["まし" , -1316 ], | |
["まで" , -6621 ], | |
["まれ" , 5409 ], | |
["めて" , -3153 ], | |
["もい" , 2230 ], | |
["もの" , -10713 ], | |
["らか" , -944 ], | |
["らし" , -1611 ], | |
["らに" , -1897 ], | |
["りし" , 651 ], | |
["りま" , 1620 ], | |
["れた" , 4270 ], | |
["れて" , 849 ], | |
["れば" , 4114 ], | |
["ろう" , 6067 ], | |
["われ" , 7901 ], | |
["を通" , -11877 ], | |
["んだ" , 728 ], | |
["んな" , -4115 ], | |
["一人" , 602 ], | |
["一方" , -1375 ], | |
["一日" , 970 ], | |
["一部" , -1051 ], | |
["上が" , -4479 ], | |
["会社" , -1116 ], | |
["出て" , 2163 ], | |
["分の" , -7758 ], | |
["同党" , 970 ], | |
["同日" , -913 ], | |
["大阪" , -2471 ], | |
["委員" , -1250 ], | |
["少な" , -1050 ], | |
["年度" , -8669 ], | |
["年間" , -1626 ], | |
["府県" , -2363 ], | |
["手権" , -1982 ], | |
["新聞" , -4066 ], | |
["日新" , -722 ], | |
["日本" , -7068 ], | |
["日米" , 3372 ], | |
["曜日" , -601 ], | |
["朝鮮" , -2355 ], | |
["本人" , -2697 ], | |
["東京" , -1543 ], | |
["然と" , -1384 ], | |
["社会" , -1276 ], | |
["立て" , -990 ], | |
["第に" , -1612 ], | |
["米国" , -4268 ], | |
["11" , -669] | |
]); | |
var BW3 = new Map([ | |
["あた" , -2194 ], | |
["あり" , 719 ], | |
["ある" , 3846 ], | |
["い." , -1185 ], | |
["い。" , -1185 ], | |
["いい" , 5308 ], | |
["いえ" , 2079 ], | |
["いく" , 3029 ], | |
["いた" , 2056 ], | |
["いっ" , 1883 ], | |
["いる" , 5600 ], | |
["いわ" , 1527 ], | |
["うち" , 1117 ], | |
["うと" , 4798 ], | |
["えと" , 1454 ], | |
["か." , 2857 ], | |
["か。" , 2857 ], | |
["かけ" , -743 ], | |
["かっ" , -4098 ], | |
["かに" , -669 ], | |
["から" , 6520 ], | |
["かり" , -2670 ], | |
["が,", 1816 ], | |
["が、" , 1816 ], | |
["がき" , -4855 ], | |
["がけ" , -1127 ], | |
["がっ" , -913 ], | |
["がら" , -4977 ], | |
["がり" , -2064 ], | |
["きた" , 1645 ], | |
["けど" , 1374 ], | |
["こと" , 7397 ], | |
["この" , 1542 ], | |
["ころ" , -2757 ], | |
["さい" , -714 ], | |
["さを" , 976 ], | |
["し,", 1557 ], | |
["し、" , 1557 ], | |
["しい" , -3714 ], | |
["した" , 3562 ], | |
["して" , 1449 ], | |
["しな" , 2608 ], | |
["しま" , 1200 ], | |
["す." , -1310 ], | |
["す。" , -1310 ], | |
["する" , 6521 ], | |
["ず,", 3426 ], | |
["ず、" , 3426 ], | |
["ずに" , 841 ], | |
["そう" , 428 ], | |
["た." , 8875 ], | |
["た。" , 8875 ], | |
["たい" , -594 ], | |
["たの" , 812 ], | |
["たり" , -1183 ], | |
["たる" , -853 ], | |
["だ." , 4098 ], | |
["だ。" , 4098 ], | |
["だっ" , 1004 ], | |
["った" , -4748 ], | |
["って" , 300 ], | |
["てい" , 6240 ], | |
["てお" , 855 ], | |
["ても" , 302 ], | |
["です" , 1437 ], | |
["でに" , -1482 ], | |
["では" , 2295 ], | |
["とう" , -1387 ], | |
["とし" , 2266 ], | |
["との" , 541 ], | |
["とも" , -3543 ], | |
["どう" , 4664 ], | |
["ない" , 1796 ], | |
["なく" , -903 ], | |
["など" , 2135 ], | |
["に,", -1021 ], | |
["に、" , -1021 ], | |
["にし" , 1771 ], | |
["にな" , 1906 ], | |
["には" , 2644 ], | |
["の,", -724 ], | |
["の、" , -724 ], | |
["の子" , -1000 ], | |
["は,", 1337 ], | |
["は、" , 1337 ], | |
["べき" , 2181 ], | |
["まし" , 1113 ], | |
["ます" , 6943 ], | |
["まっ" , -1549 ], | |
["まで" , 6154 ], | |
["まれ" , -793 ], | |
["らし" , 1479 ], | |
["られ" , 6820 ], | |
["るる" , 3818 ], | |
["れ,", 854 ], | |
["れ、" , 854 ], | |
["れた" , 1850 ], | |
["れて" , 1375 ], | |
["れば" , -3246 ], | |
["れる" , 1091 ], | |
["われ" , -605 ], | |
["んだ" , 606 ], | |
["んで" , 798 ], | |
["カ月" , 990 ], | |
["会議" , 860 ], | |
["入り" , 1232 ], | |
["大会" , 2217 ], | |
["始め" , 1681 ], | |
["市" , 965 ], | |
["新聞" , -5055 ], | |
["日,", 974 ], | |
["日、" , 974 ], | |
["社会" , 2024 ], | |
["カ月" , 990] | |
]); | |
var TC1 = new Map([ | |
["AAA" , 1093 ], | |
["HHH" , 1029 ], | |
["HHM" , 580 ], | |
["HII" , 998 ], | |
["HOH" , -390 ], | |
["HOM" , -331 ], | |
["IHI" , 1169 ], | |
["IOH" , -142 ], | |
["IOI" , -1015 ], | |
["IOM" , 467 ], | |
["MMH" , 187 ], | |
["OOI" , -1832] | |
]); | |
var TC2 = new Map([ | |
["HHO" , 2088 ], | |
["HII" , -1023 ], | |
["HMM" , -1154 ], | |
["IHI" , -1965 ], | |
["KKH" , 703 ], | |
["OII" , -2649] | |
]); | |
var TC3 = new Map([ | |
["AAA" , -294 ], | |
["HHH" , 346 ], | |
["HHI" , -341 ], | |
["HII" , -1088 ], | |
["HIK" , 731 ], | |
["HOH" , -1486 ], | |
["IHH" , 128 ], | |
["IHI" , -3041 ], | |
["IHO" , -1935 ], | |
["IIH" , -825 ], | |
["IIM" , -1035 ], | |
["IOI" , -542 ], | |
["KHH" , -1216 ], | |
["KKA" , 491 ], | |
["KKH" , -1217 ], | |
["KOK" , -1009 ], | |
["MHH" , -2694 ], | |
["MHM" , -457 ], | |
["MHO" , 123 ], | |
["MMH" , -471 ], | |
["NNH" , -1689 ], | |
["NNO" , 662 ], | |
["OHO" , -3393] | |
]); | |
var TC4 = new Map([ | |
["HHH" , -203 ], | |
["HHI" , 1344 ], | |
["HHK" , 365 ], | |
["HHM" , -122 ], | |
["HHN" , 182 ], | |
["HHO" , 669 ], | |
["HIH" , 804 ], | |
["HII" , 679 ], | |
["HOH" , 446 ], | |
["IHH" , 695 ], | |
["IHO" , -2324 ], | |
["IIH" , 321 ], | |
["III" , 1497 ], | |
["IIO" , 656 ], | |
["IOO" , 54 ], | |
["KAK" , 4845 ], | |
["KKA" , 3386 ], | |
["KKK" , 3065 ], | |
["MHH" , -405 ], | |
["MHI" , 201 ], | |
["MMH" , -241 ], | |
["MMM" , 661 ], | |
["MOM" , 841] | |
]); | |
var TQ1 = new Map([ | |
["BHHH" , -227 ], | |
["BHHI" , 316 ], | |
["BHIH" , -132 ], | |
["BIHH" , 60 ], | |
["BIII" , 1595 ], | |
["BNHH" , -744 ], | |
["BOHH" , 225 ], | |
["BOOO" , -908 ], | |
["OAKK" , 482 ], | |
["OHHH" , 281 ], | |
["OHIH" , 249 ], | |
["OIHI" , 200 ], | |
["OIIH" , -68] | |
]); | |
var TQ2 = new Map([ | |
["BIHH" , -1401 ], | |
["BIII" , -1033 ], | |
["BKAK" , -543 ], | |
["BOOO" , -5591] | |
]); | |
var TQ3 = new Map([ | |
["BHHH" , 478 ], | |
["BHHM" , -1073 ], | |
["BHIH" , 222 ], | |
["BHII" , -504 ], | |
["BIIH" , -116 ], | |
["BIII" , -105 ], | |
["BMHI" , -863 ], | |
["BMHM" , -464 ], | |
["BOMH" , 620 ], | |
["OHHH" , 346 ], | |
["OHHI" , 1729 ], | |
["OHII" , 997 ], | |
["OHMH" , 481 ], | |
["OIHH" , 623 ], | |
["OIIH" , 1344 ], | |
["OKAK" , 2792 ], | |
["OKHH" , 587 ], | |
["OKKA" , 679 ], | |
["OOHH" , 110 ], | |
["OOII" , -685] | |
]); | |
var TQ4 = new Map([ | |
["BHHH" , -721 ], | |
["BHHM" , -3604 ], | |
["BHII" , -966 ], | |
["BIIH" , -607 ], | |
["BIII" , -2181 ], | |
["OAAA" , -2763 ], | |
["OAKK" , 180 ], | |
["OHHH" , -294 ], | |
["OHHI" , 2446 ], | |
["OHHO" , 480 ], | |
["OHIH" , -1573 ], | |
["OIHH" , 1935 ], | |
["OIHI" , -493 ], | |
["OIIH" , 626 ], | |
["OIII" , -4007 ], | |
["OKAK" , -8156] | |
]); | |
var TW1 = new Map([ | |
["につい" , -4681 ], | |
["東京都" , 2026] | |
]); | |
var TW2 = new Map([ | |
["ある程" , -2049 ], | |
["いった" , -1256 ], | |
["ころが" , -2434 ], | |
["しょう" , 3873 ], | |
["その後" , -4430 ], | |
["だって" , -1049 ], | |
["ていた" , 1833 ], | |
["として" , -4657 ], | |
["ともに" , -4517 ], | |
["もので" , 1882 ], | |
["一気に" , -792 ], | |
["初めて" , -1512 ], | |
["同時に" , -8097 ], | |
["大きな" , -1255 ], | |
["対して" , -2721 ], | |
["社会党" , -3216] | |
]); | |
var TW3 = new Map([ | |
["いただ" , -1734 ], | |
["してい" , 1314 ], | |
["として" , -4314 ], | |
["につい" , -5483 ], | |
["にとっ" , -5989 ], | |
["に当た" , -6247 ], | |
["ので,", -727 ], | |
["ので、" , -727 ], | |
["のもの" , -600 ], | |
["れから" , -3752 ], | |
["十二月" , -2287] | |
]); | |
var TW4 = new Map([ | |
["いう." , 8576 ], | |
["いう。" , 8576 ], | |
["からな" , -2348 ], | |
["してい" , 2958 ], | |
["たが,", 1516 ], | |
["たが、" , 1516 ], | |
["ている" , 1538 ], | |
["という" , 1349 ], | |
["ました" , 5543 ], | |
["ません" , 1097 ], | |
["ようと" , -4258 ], | |
["よると" , 5865] | |
]); | |
var UC1 = new Map([ | |
["A" , 484 ], | |
["K" , 93 ], | |
["M" , 645 ], | |
["O" , -505] | |
]); | |
var UC2 = new Map([ | |
["A" , 819 ], | |
["H" , 1059 ], | |
["I" , 409 ], | |
["M" , 3987 ], | |
["N" , 5775 ], | |
["O" , 646] | |
]); | |
var UC3 = new Map([ | |
["A" , -1370 ], | |
["I" , 2311] | |
]); | |
var UC4 = new Map([ | |
["A" , -2643 ], | |
["H" , 1809 ], | |
["I" , -1032 ], | |
["K" , -3450 ], | |
["M" , 3565 ], | |
["N" , 3876 ], | |
["O" , 6646] | |
]); | |
var UC5 = new Map([ | |
["H" , 313 ], | |
["I" , -1238 ], | |
["K" , -799 ], | |
["M" , 539 ], | |
["O" , -831] | |
]); | |
var UC6 = new Map([ | |
["H" , -506 ], | |
["I" , -253 ], | |
["K" , 87 ], | |
["M" , 247 ], | |
["O" , -387] | |
]); | |
var UP1 = new Map([ | |
["O" , -214] | |
]); | |
var UP2 = new Map([ | |
["B" , 69 ], | |
["O" , 935] | |
]); | |
var UP3 = new Map([ | |
["B" , 189] | |
]); | |
var UQ1 = new Map([ | |
["BH" , 21 ], | |
["BI" , -12 ], | |
["BK" , -99 ], | |
["BN" , 142 ], | |
["BO" , -56 ], | |
["OH" , -95 ], | |
["OI" , 477 ], | |
["OK" , 410 ], | |
["OO" , -2422] | |
]); | |
var UQ2 = new Map([ | |
["BH" , 216 ], | |
["BI" , 113 ], | |
["OK" , 1759] | |
]); | |
var UQ3 = new Map([ | |
["BA" , -479 ], | |
["BH" , 42 ], | |
["BI" , 1913 ], | |
["BK" , -7198 ], | |
["BM" , 3160 ], | |
["BN" , 6427 ], | |
["BO" , 14761 ], | |
["OI" , -827 ], | |
["ON" , -3212] | |
]); | |
var UW1 = new Map([ | |
["," , 156 ], | |
["、" , 156 ], | |
["「" , -463 ], | |
["あ" , -941 ], | |
["う" , -127 ], | |
["が" , -553 ], | |
["き" , 121 ], | |
["こ" , 505 ], | |
["で" , -201 ], | |
["と" , -547 ], | |
["ど" , -123 ], | |
["に" , -789 ], | |
["の" , -185 ], | |
["は" , -847 ], | |
["も" , -466 ], | |
["や" , -470 ], | |
["よ" , 182 ], | |
["ら" , -292 ], | |
["り" , 208 ], | |
["れ" , 169 ], | |
["を" , -446 ], | |
["ん" , -137 ], | |
["・" , -135 ], | |
["主" , -402 ], | |
["京" , -268 ], | |
["区" , -912 ], | |
["午" , 871 ], | |
["国" , -460 ], | |
["大" , 561 ], | |
["委" , 729 ], | |
["市" , -411 ], | |
["日" , -141 ], | |
["理" , 361 ], | |
["生" , -408 ], | |
["県" , -386 ], | |
["都" , -718 ], | |
["「" , -463 ], | |
["・" , -135] | |
]); | |
var UW2 = new Map([ | |
["," , -829 ], | |
["、" , -829 ], | |
["〇" , 892 ], | |
["「" , -645 ], | |
["」" , 3145 ], | |
["あ" , -538 ], | |
["い" , 505 ], | |
["う" , 134 ], | |
["お" , -502 ], | |
["か" , 1454 ], | |
["が" , -856 ], | |
["く" , -412 ], | |
["こ" , 1141 ], | |
["さ" , 878 ], | |
["ざ" , 540 ], | |
["し" , 1529 ], | |
["す" , -675 ], | |
["せ" , 300 ], | |
["そ" , -1011 ], | |
["た" , 188 ], | |
["だ" , 1837 ], | |
["つ" , -949 ], | |
["て" , -291 ], | |
["で" , -268 ], | |
["と" , -981 ], | |
["ど" , 1273 ], | |
["な" , 1063 ], | |
["に" , -1764 ], | |
["の" , 130 ], | |
["は" , -409 ], | |
["ひ" , -1273 ], | |
["べ" , 1261 ], | |
["ま" , 600 ], | |
["も" , -1263 ], | |
["や" , -402 ], | |
["よ" , 1639 ], | |
["り" , -579 ], | |
["る" , -694 ], | |
["れ" , 571 ], | |
["を" , -2516 ], | |
["ん" , 2095 ], | |
["ア" , -587 ], | |
["カ" , 306 ], | |
["キ" , 568 ], | |
["ッ" , 831 ], | |
["三" , -758 ], | |
["不" , -2150 ], | |
["世" , -302 ], | |
["中" , -968 ], | |
["主" , -861 ], | |
["事" , 492 ], | |
["人" , -123 ], | |
["会" , 978 ], | |
["保" , 362 ], | |
["入" , 548 ], | |
["初" , -3025 ], | |
["副" , -1566 ], | |
["北" , -3414 ], | |
["区" , -422 ], | |
["大" , -1769 ], | |
["天" , -865 ], | |
["太" , -483 ], | |
["子" , -1519 ], | |
["学" , 760 ], | |
["実" , 1023 ], | |
["小" , -2009 ], | |
["市" , -813 ], | |
["年" , -1060 ], | |
["強" , 1067 ], | |
["手" , -1519 ], | |
["揺" , -1033 ], | |
["政" , 1522 ], | |
["文" , -1355 ], | |
["新" , -1682 ], | |
["日" , -1815 ], | |
["明" , -1462 ], | |
["最" , -630 ], | |
["朝" , -1843 ], | |
["本" , -1650 ], | |
["東" , -931 ], | |
["果" , -665 ], | |
["次" , -2378 ], | |
["民" , -180 ], | |
["気" , -1740 ], | |
["理" , 752 ], | |
["発" , 529 ], | |
["目" , -1584 ], | |
["相" , -242 ], | |
["県" , -1165 ], | |
["立" , -763 ], | |
["第" , 810 ], | |
["米" , 509 ], | |
["自" , -1353 ], | |
["行" , 838 ], | |
["西" , -744 ], | |
["見" , -3874 ], | |
["調" , 1010 ], | |
["議" , 1198 ], | |
["込" , 3041 ], | |
["開" , 1758 ], | |
["間" , -1257 ], | |
["「" , -645 ], | |
["」" , 3145 ], | |
["ッ" , 831 ], | |
["ア" , -587 ], | |
["カ" , 306 ], | |
["キ" , 568] | |
]); | |
var UW3 = new Map([ | |
["," , 4889 ], | |
["1" , -800 ], | |
["−" , -1723 ], | |
["、" , 4889 ], | |
["々" , -2311 ], | |
["〇" , 5827 ], | |
["」" , 2670 ], | |
["〓" , -3573 ], | |
["あ" , -2696 ], | |
["い" , 1006 ], | |
["う" , 2342 ], | |
["え" , 1983 ], | |
["お" , -4864 ], | |
["か" , -1163 ], | |
["が" , 3271 ], | |
["く" , 1004 ], | |
["け" , 388 ], | |
["げ" , 401 ], | |
["こ" , -3552 ], | |
["ご" , -3116 ], | |
["さ" , -1058 ], | |
["し" , -395 ], | |
["す" , 584 ], | |
["せ" , 3685 ], | |
["そ" , -5228 ], | |
["た" , 842 ], | |
["ち" , -521 ], | |
["っ" , -1444 ], | |
["つ" , -1081 ], | |
["て" , 6167 ], | |
["で" , 2318 ], | |
["と" , 1691 ], | |
["ど" , -899 ], | |
["な" , -2788 ], | |
["に" , 2745 ], | |
["の" , 4056 ], | |
["は" , 4555 ], | |
["ひ" , -2171 ], | |
["ふ" , -1798 ], | |
["へ" , 1199 ], | |
["ほ" , -5516 ], | |
["ま" , -4384 ], | |
["み" , -120 ], | |
["め" , 1205 ], | |
["も" , 2323 ], | |
["や" , -788 ], | |
["よ" , -202 ], | |
["ら" , 727 ], | |
["り" , 649 ], | |
["る" , 5905 ], | |
["れ" , 2773 ], | |
["わ" , -1207 ], | |
["を" , 6620 ], | |
["ん" , -518 ], | |
["ア" , 551 ], | |
["グ" , 1319 ], | |
["ス" , 874 ], | |
["ッ" , -1350 ], | |
["ト" , 521 ], | |
["ム" , 1109 ], | |
["ル" , 1591 ], | |
["ロ" , 2201 ], | |
["ン" , 278 ], | |
["・" , -3794 ], | |
["一" , -1619 ], | |
["下" , -1759 ], | |
["世" , -2087 ], | |
["両" , 3815 ], | |
["中" , 653 ], | |
["主" , -758 ], | |
["予" , -1193 ], | |
["二" , 974 ], | |
["人" , 2742 ], | |
["今" , 792 ], | |
["他" , 1889 ], | |
["以" , -1368 ], | |
["低" , 811 ], | |
["何" , 4265 ], | |
["作" , -361 ], | |
["保" , -2439 ], | |
["元" , 4858 ], | |
["党" , 3593 ], | |
["全" , 1574 ], | |
["公" , -3030 ], | |
["六" , 755 ], | |
["共" , -1880 ], | |
["円" , 5807 ], | |
["再" , 3095 ], | |
["分" , 457 ], | |
["初" , 2475 ], | |
["別" , 1129 ], | |
["前" , 2286 ], | |
["副" , 4437 ], | |
["力" , 365 ], | |
["動" , -949 ], | |
["務" , -1872 ], | |
["化" , 1327 ], | |
["北" , -1038 ], | |
["区" , 4646 ], | |
["千" , -2309 ], | |
["午" , -783 ], | |
["協" , -1006 ], | |
["口" , 483 ], | |
["右" , 1233 ], | |
["各" , 3588 ], | |
["合" , -241 ], | |
["同" , 3906 ], | |
["和" , -837 ], | |
["員" , 4513 ], | |
["国" , 642 ], | |
["型" , 1389 ], | |
["場" , 1219 ], | |
["外" , -241 ], | |
["妻" , 2016 ], | |
["学" , -1356 ], | |
["安" , -423 ], | |
["実" , -1008 ], | |
["家" , 1078 ], | |
["小" , -513 ], | |
["少" , -3102 ], | |
["州" , 1155 ], | |
["市" , 3197 ], | |
["平" , -1804 ], | |
["年" , 2416 ], | |
["広" , -1030 ], | |
["府" , 1605 ], | |
["度" , 1452 ], | |
["建" , -2352 ], | |
["当" , -3885 ], | |
["得" , 1905 ], | |
["思" , -1291 ], | |
["性" , 1822 ], | |
["戸" , -488 ], | |
["指" , -3973 ], | |
["政" , -2013 ], | |
["教" , -1479 ], | |
["数" , 3222 ], | |
["文" , -1489 ], | |
["新" , 1764 ], | |
["日" , 2099 ], | |
["旧" , 5792 ], | |
["昨" , -661 ], | |
["時" , -1248 ], | |
["曜" , -951 ], | |
["最" , -937 ], | |
["月" , 4125 ], | |
["期" , 360 ], | |
["李" , 3094 ], | |
["村" , 364 ], | |
["東" , -805 ], | |
["核" , 5156 ], | |
["森" , 2438 ], | |
["業" , 484 ], | |
["氏" , 2613 ], | |
["民" , -1694 ], | |
["決" , -1073 ], | |
["法" , 1868 ], | |
["海" , -495 ], | |
["無" , 979 ], | |
["物" , 461 ], | |
["特" , -3850 ], | |
["生" , -273 ], | |
["用" , 914 ], | |
["町" , 1215 ], | |
["的" , 7313 ], | |
["直" , -1835 ], | |
["省" , 792 ], | |
["県" , 6293 ], | |
["知" , -1528 ], | |
["私" , 4231 ], | |
["税" , 401 ], | |
["立" , -960 ], | |
["第" , 1201 ], | |
["米" , 7767 ], | |
["系" , 3066 ], | |
["約" , 3663 ], | |
["級" , 1384 ], | |
["統" , -4229 ], | |
["総" , 1163 ], | |
["線" , 1255 ], | |
["者" , 6457 ], | |
["能" , 725 ], | |
["自" , -2869 ], | |
["英" , 785 ], | |
["見" , 1044 ], | |
["調" , -562 ], | |
["財" , -733 ], | |
["費" , 1777 ], | |
["車" , 1835 ], | |
["軍" , 1375 ], | |
["込" , -1504 ], | |
["通" , -1136 ], | |
["選" , -681 ], | |
["郎" , 1026 ], | |
["郡" , 4404 ], | |
["部" , 1200 ], | |
["金" , 2163 ], | |
["長" , 421 ], | |
["開" , -1432 ], | |
["間" , 1302 ], | |
["関" , -1282 ], | |
["雨" , 2009 ], | |
["電" , -1045 ], | |
["非" , 2066 ], | |
["駅" , 1620 ], | |
["1" , -800 ], | |
["」" , 2670 ], | |
["・" , -3794 ], | |
["ッ" , -1350 ], | |
["ア" , 551 ], | |
["グ" , 1319 ], | |
["ス" , 874 ], | |
["ト" , 521 ], | |
["ム" , 1109 ], | |
["ル" , 1591 ], | |
["ロ" , 2201 ], | |
["ン" , 278] | |
]); | |
var UW4 = new Map([ | |
["," , 3930 ], | |
["." , 3508 ], | |
["―" , -4841 ], | |
["、" , 3930 ], | |
["。" , 3508 ], | |
["〇" , 4999 ], | |
["「" , 1895 ], | |
["」" , 3798 ], | |
["〓" , -5156 ], | |
["あ" , 4752 ], | |
["い" , -3435 ], | |
["う" , -640 ], | |
["え" , -2514 ], | |
["お" , 2405 ], | |
["か" , 530 ], | |
["が" , 6006 ], | |
["き" , -4482 ], | |
["ぎ" , -3821 ], | |
["く" , -3788 ], | |
["け" , -4376 ], | |
["げ" , -4734 ], | |
["こ" , 2255 ], | |
["ご" , 1979 ], | |
["さ" , 2864 ], | |
["し" , -843 ], | |
["じ" , -2506 ], | |
["す" , -731 ], | |
["ず" , 1251 ], | |
["せ" , 181 ], | |
["そ" , 4091 ], | |
["た" , 5034 ], | |
["だ" , 5408 ], | |
["ち" , -3654 ], | |
["っ" , -5882 ], | |
["つ" , -1659 ], | |
["て" , 3994 ], | |
["で" , 7410 ], | |
["と" , 4547 ], | |
["な" , 5433 ], | |
["に" , 6499 ], | |
["ぬ" , 1853 ], | |
["ね" , 1413 ], | |
["の" , 7396 ], | |
["は" , 8578 ], | |
["ば" , 1940 ], | |
["ひ" , 4249 ], | |
["び" , -4134 ], | |
["ふ" , 1345 ], | |
["へ" , 6665 ], | |
["べ" , -744 ], | |
["ほ" , 1464 ], | |
["ま" , 1051 ], | |
["み" , -2082 ], | |
["む" , -882 ], | |
["め" , -5046 ], | |
["も" , 4169 ], | |
["ゃ" , -2666 ], | |
["や" , 2795 ], | |
["ょ" , -1544 ], | |
["よ" , 3351 ], | |
["ら" , -2922 ], | |
["り" , -9726 ], | |
["る" , -14896 ], | |
["れ" , -2613 ], | |
["ろ" , -4570 ], | |
["わ" , -1783 ], | |
["を" , 13150 ], | |
["ん" , -2352 ], | |
["カ" , 2145 ], | |
["コ" , 1789 ], | |
["セ" , 1287 ], | |
["ッ" , -724 ], | |
["ト" , -403 ], | |
["メ" , -1635 ], | |
["ラ" , -881 ], | |
["リ" , -541 ], | |
["ル" , -856 ], | |
["ン" , -3637 ], | |
["・" , -4371 ], | |
["ー" , -11870 ], | |
["一" , -2069 ], | |
["中" , 2210 ], | |
["予" , 782 ], | |
["事" , -190 ], | |
["井" , -1768 ], | |
["人" , 1036 ], | |
["以" , 544 ], | |
["会" , 950 ], | |
["体" , -1286 ], | |
["作" , 530 ], | |
["側" , 4292 ], | |
["先" , 601 ], | |
["党" , -2006 ], | |
["共" , -1212 ], | |
["内" , 584 ], | |
["円" , 788 ], | |
["初" , 1347 ], | |
["前" , 1623 ], | |
["副" , 3879 ], | |
["力" , -302 ], | |
["動" , -740 ], | |
["務" , -2715 ], | |
["化" , 776 ], | |
["区" , 4517 ], | |
["協" , 1013 ], | |
["参" , 1555 ], | |
["合" , -1834 ], | |
["和" , -681 ], | |
["員" , -910 ], | |
["器" , -851 ], | |
["回" , 1500 ], | |
["国" , -619 ], | |
["園" , -1200 ], | |
["地" , 866 ], | |
["場" , -1410 ], | |
["塁" , -2094 ], | |
["士" , -1413 ], | |
["多" , 1067 ], | |
["大" , 571 ], | |
["子" , -4802 ], | |
["学" , -1397 ], | |
["定" , -1057 ], | |
["寺" , -809 ], | |
["小" , 1910 ], | |
["屋" , -1328 ], | |
["山" , -1500 ], | |
["島" , -2056 ], | |
["川" , -2667 ], | |
["市" , 2771 ], | |
["年" , 374 ], | |
["庁" , -4556 ], | |
["後" , 456 ], | |
["性" , 553 ], | |
["感" , 916 ], | |
["所" , -1566 ], | |
["支" , 856 ], | |
["改" , 787 ], | |
["政" , 2182 ], | |
["教" , 704 ], | |
["文" , 522 ], | |
["方" , -856 ], | |
["日" , 1798 ], | |
["時" , 1829 ], | |
["最" , 845 ], | |
["月" , -9066 ], | |
["木" , -485 ], | |
["来" , -442 ], | |
["校" , -360 ], | |
["業" , -1043 ], | |
["氏" , 5388 ], | |
["民" , -2716 ], | |
["気" , -910 ], | |
["沢" , -939 ], | |
["済" , -543 ], | |
["物" , -735 ], | |
["率" , 672 ], | |
["球" , -1267 ], | |
["生" , -1286 ], | |
["産" , -1101 ], | |
["田" , -2900 ], | |
["町" , 1826 ], | |
["的" , 2586 ], | |
["目" , 922 ], | |
["省" , -3485 ], | |
["県" , 2997 ], | |
["空" , -867 ], | |
["立" , -2112 ], | |
["第" , 788 ], | |
["米" , 2937 ], | |
["系" , 786 ], | |
["約" , 2171 ], | |
["経" , 1146 ], | |
["統" , -1169 ], | |
["総" , 940 ], | |
["線" , -994 ], | |
["署" , 749 ], | |
["者" , 2145 ], | |
["能" , -730 ], | |
["般" , -852 ], | |
["行" , -792 ], | |
["規" , 792 ], | |
["警" , -1184 ], | |
["議" , -244 ], | |
["谷" , -1000 ], | |
["賞" , 730 ], | |
["車" , -1481 ], | |
["軍" , 1158 ], | |
["輪" , -1433 ], | |
["込" , -3370 ], | |
["近" , 929 ], | |
["道" , -1291 ], | |
["選" , 2596 ], | |
["郎" , -4866 ], | |
["都" , 1192 ], | |
["野" , -1100 ], | |
["銀" , -2213 ], | |
["長" , 357 ], | |
["間" , -2344 ], | |
["院" , -2297 ], | |
["際" , -2604 ], | |
["電" , -878 ], | |
["領" , -1659 ], | |
["題" , -792 ], | |
["館" , -1984 ], | |
["首" , 1749 ], | |
["高" , 2120 ], | |
["「" , 1895 ], | |
["」" , 3798 ], | |
["・" , -4371 ], | |
["ッ" , -724 ], | |
["ー" , -11870 ], | |
["カ" , 2145 ], | |
["コ" , 1789 ], | |
["セ" , 1287 ], | |
["ト" , -403 ], | |
["メ" , -1635 ], | |
["ラ" , -881 ], | |
["リ" , -541 ], | |
["ル" , -856 ], | |
["ン" , -3637] | |
]); | |
var UW5 = new Map([ | |
["," , 465 ], | |
["." , -299 ], | |
["1" , -514 ], | |
["E2" , -32768 ], | |
["]" , -2762 ], | |
["、" , 465 ], | |
["。" , -299 ], | |
["「" , 363 ], | |
["あ" , 1655 ], | |
["い" , 331 ], | |
["う" , -503 ], | |
["え" , 1199 ], | |
["お" , 527 ], | |
["か" , 647 ], | |
["が" , -421 ], | |
["き" , 1624 ], | |
["ぎ" , 1971 ], | |
["く" , 312 ], | |
["げ" , -983 ], | |
["さ" , -1537 ], | |
["し" , -1371 ], | |
["す" , -852 ], | |
["だ" , -1186 ], | |
["ち" , 1093 ], | |
["っ" , 52 ], | |
["つ" , 921 ], | |
["て" , -18 ], | |
["で" , -850 ], | |
["と" , -127 ], | |
["ど" , 1682 ], | |
["な" , -787 ], | |
["に" , -1224 ], | |
["の" , -635 ], | |
["は" , -578 ], | |
["べ" , 1001 ], | |
["み" , 502 ], | |
["め" , 865 ], | |
["ゃ" , 3350 ], | |
["ょ" , 854 ], | |
["り" , -208 ], | |
["る" , 429 ], | |
["れ" , 504 ], | |
["わ" , 419 ], | |
["を" , -1264 ], | |
["ん" , 327 ], | |
["イ" , 241 ], | |
["ル" , 451 ], | |
["ン" , -343 ], | |
["中" , -871 ], | |
["京" , 722 ], | |
["会" , -1153 ], | |
["党" , -654 ], | |
["務" , 3519 ], | |
["区" , -901 ], | |
["告" , 848 ], | |
["員" , 2104 ], | |
["大" , -1296 ], | |
["学" , -548 ], | |
["定" , 1785 ], | |
["嵐" , -1304 ], | |
["市" , -2991 ], | |
["席" , 921 ], | |
["年" , 1763 ], | |
["思" , 872 ], | |
["所" , -814 ], | |
["挙" , 1618 ], | |
["新" , -1682 ], | |
["日" , 218 ], | |
["月" , -4353 ], | |
["査" , 932 ], | |
["格" , 1356 ], | |
["機" , -1508 ], | |
["氏" , -1347 ], | |
["田" , 240 ], | |
["町" , -3912 ], | |
["的" , -3149 ], | |
["相" , 1319 ], | |
["省" , -1052 ], | |
["県" , -4003 ], | |
["研" , -997 ], | |
["社" , -278 ], | |
["空" , -813 ], | |
["統" , 1955 ], | |
["者" , -2233 ], | |
["表" , 663 ], | |
["語" , -1073 ], | |
["議" , 1219 ], | |
["選" , -1018 ], | |
["郎" , -368 ], | |
["長" , 786 ], | |
["間" , 1191 ], | |
["題" , 2368 ], | |
["館" , -689 ], | |
["1" , -514 ], | |
["E2" , -32768 ], | |
["「" , 363 ], | |
["イ" , 241 ], | |
["ル" , 451 ], | |
["ン" , -343] | |
]); | |
var UW6 = new Map([ | |
["," , 227 ], | |
["." , 808 ], | |
["1" , -270 ], | |
["E1" , 306 ], | |
["、" , 227 ], | |
["。" , 808 ], | |
["あ" , -307 ], | |
["う" , 189 ], | |
["か" , 241 ], | |
["が" , -73 ], | |
["く" , -121 ], | |
["こ" , -200 ], | |
["じ" , 1782 ], | |
["す" , 383 ], | |
["た" , -428 ], | |
["っ" , 573 ], | |
["て" , -1014 ], | |
["で" , 101 ], | |
["と" , -105 ], | |
["な" , -253 ], | |
["に" , -149 ], | |
["の" , -417 ], | |
["は" , -236 ], | |
["も" , -206 ], | |
["り" , 187 ], | |
["る" , -135 ], | |
["を" , 195 ], | |
["ル" , -673 ], | |
["ン" , -496 ], | |
["一" , -277 ], | |
["中" , 201 ], | |
["件" , -800 ], | |
["会" , 624 ], | |
["前" , 302 ], | |
["区" , 1792 ], | |
["員" , -1212 ], | |
["委" , 798 ], | |
["学" , -960 ], | |
["市" , 887 ], | |
["広" , -695 ], | |
["後" , 535 ], | |
["業" , -697 ], | |
["相" , 753 ], | |
["社" , -507 ], | |
["福" , 974 ], | |
["空" , -822 ], | |
["者" , 1811 ], | |
["連" , 463 ], | |
["郎" , 1082 ], | |
["1" , -270 ], | |
["E1" , 306 ], | |
["ル" , -673 ], | |
["ン" , -496] | |
]); | |
var CharMap = (() =>{ | |
var m = new Map(); | |
for ( var s = '一'.charCodeAt(), e = '龠'.charCodeAt(); s <= e; s++ ) { | |
m.set( String.fromCharCode(s), 'H' ); | |
} | |
for ( var s = 'ぁ'.charCodeAt(), e = 'ん'.charCodeAt(); s <= e; s++ ) { | |
m.set( String.fromCharCode(s), 'I' ); | |
} | |
for ( var s = 'ァ'.charCodeAt(), e = 'ヴ'.charCodeAt(); s <= e; s++ ) { | |
m.set( String.fromCharCode(s), 'K' ); | |
} | |
for ( var s = 'ァ'.charCodeAt(), e = 'ン'.charCodeAt(); s <= e; s++ ) { | |
m.set( String.fromCharCode(s), 'K' ); | |
} | |
for ( var s = 'a'.charCodeAt(), e = 'z'.charCodeAt(); s <= e; s++ ) { | |
m.set( String.fromCharCode(s), 'A' ); | |
} | |
for ( var s = 'A'.charCodeAt(), e = 'Z'.charCodeAt(); s <= e; s++ ) { | |
m.set( String.fromCharCode(s), 'A' ); | |
} | |
for ( var s = 'a'.charCodeAt(), e = 'z'.charCodeAt(); s <= e; s++ ) { | |
m.set( String.fromCharCode(s), 'A' ); | |
} | |
for ( var s = 'A'.charCodeAt(), e = 'Z'.charCodeAt(); s <= e; s++ ) { | |
m.set( String.fromCharCode(s), 'A' ); | |
} | |
for ( var s = '0'.charCodeAt(), e = '9'.charCodeAt(); s <= e; s++ ) { | |
m.set( String.fromCharCode(s), 'N' ); | |
} | |
for ( var s = '0'.charCodeAt(), e = '9'.charCodeAt(); s <= e; s++ ) { | |
m.set( String.fromCharCode(s), 'N' ); | |
} | |
'一二三四五六七八九十百千万億兆'.split('').forEach((c) => { | |
m.set( c, 'M' ); | |
}); | |
'々〆ヵヶ'.split('').forEach((c) => { | |
m.set( c, 'H' ); | |
}); | |
return m; | |
})(); | |
function tokenize(text) { | |
if ( typeof(text) === 'undefined' || text === null || ! text instanceof String || text === "" ) { | |
return []; | |
} | |
var result = []; | |
var idx = 0; | |
var start = idx; | |
var last = text.length - 1; | |
var p1 = 'U' | |
, p2 = 'U' | |
, p3 = 'U' | |
; | |
var w1 = 'B3' | |
, w2 = 'B2' | |
, w3 = 'B1' | |
; | |
var c1 = 'O' | |
, c2 = 'O' | |
, c3 = 'O' | |
; | |
var w4 = text[idx]; | |
var c4 = CharMap.get(w4) || 'O'; | |
var idx1 = idx + 1; | |
var idx2 = idx + 2; | |
var idx3 = null; | |
var w5, w6, c5, c6 ; | |
if ( idx === last ) { | |
w5 = 'E1'; | |
w6 = 'E2'; | |
c5 = 'O'; | |
c6 = 'O'; | |
} else { | |
w5 = text[idx1]; | |
c5 = CharMap.get(w5) || 'O'; | |
if ( idx1 === last ) { | |
w6 = 'E1'; | |
c5 = 'O'; | |
} else { | |
w6 = text[idx2]; | |
c6 = CharMap.get(w6) || 'O'; | |
} | |
} | |
while ( idx < last ) { | |
var score = BIAS; | |
w1 = w2; | |
w2 = w3; | |
w3 = w4; | |
w4 = w5; | |
w5 = w6; | |
c1 = c2; | |
c2 = c3; | |
c3 = c4; | |
c4 = c5; | |
c5 = c6; | |
idx3 = idx + 3; | |
if ( idx3 <= last ) { | |
w6 = text[idx3]; | |
c6 = CharMap.get(w6) || 'O'; | |
} else if ( idx2 === last ) { | |
w6 = 'E1'; | |
c6 = 'O'; | |
} else { | |
w6 = 'E2'; | |
c6 = 'O'; | |
} | |
if ( p1 === 'O' ) { | |
score += -214; | |
} | |
if ( p2 === 'B' ) { | |
score += 69; | |
} else if ( p2 === 'O' ) { | |
score += 935; | |
} | |
if ( p3 === 'B' ) { | |
score += 189; | |
} | |
score += (BP1.get(p1 + p2) || 0); | |
score += (BP2.get(p2 + p3) || 0); | |
score += (UW1.get(w1) || 0); | |
score += (UW2.get(w2) || 0); | |
score += (UW3.get(w3) || 0); | |
score += (UW4.get(w4) || 0); | |
score += (UW5.get(w5) || 0); | |
score += (UW6.get(w6) || 0); | |
score += (BW1.get(w2 + w3) || 0); | |
score += (BW2.get(w3 + w4) || 0); | |
score += (BW3.get(w4 + w5) || 0); | |
score += (TW1.get(w1 + w2 + w3) || 0); | |
score += (TW2.get(w2 + w3 + w4) || 0); | |
score += (TW3.get(w3 + w4 + w5) || 0); | |
score += (TW4.get(w4 + w5 + w6) || 0); | |
score += (UC1.get(c1) || 0); | |
score += (UC2.get(c2) || 0); | |
if ( c3 === 'A' ) { | |
score += -1370; | |
} else if ( c3 === 'I' ) { | |
score += 2311; | |
} | |
score += (UC4.get(c4) || 0); | |
score += (UC5.get(c5) || 0); | |
score += (UC6.get(c6) || 0); | |
score += (BC1.get(c2 + c3) || 0); | |
score += (BC2.get(c3 + c4) || 0); | |
score += (BC3.get(c4 + c5) || 0); | |
score += (TC1.get(c1 + c2 + c3) || 0); | |
score += (TC2.get(c2 + c3 + c4) || 0); | |
score += (TC3.get(c3 + c4 + c5) || 0); | |
score += (TC4.get(c4 + c5 + c6) || 0); | |
score += (UQ1.get(p1 + c1) || 0); | |
score += (UQ2.get(p2 + c2) || 0); | |
score += (UQ3.get(p3 + c3) || 0); | |
score += (BQ1.get(p2 + c2 + c3) || 0); | |
score += (BQ2.get(p2 + c3 + c4) || 0); | |
score += (BQ3.get(p3 + c2 + c3) || 0); | |
score += (BQ4.get(p3 + c3 + c4) || 0); | |
score += (TQ1.get(p2 + c1 + c2 + c3) || 0); | |
score += (TQ2.get(p2 + c2 + c3 + c4) || 0); | |
score += (TQ3.get(p3 + c1 + c2 + c3) || 0); | |
score += (TQ4.get(p3 + c2 + c3 + c4) || 0); | |
var p = 'O'; | |
if ( score > 0 ) { | |
result[ result.length ] = text.substring(start, idx1); | |
start = idx1; | |
p = 'B'; | |
} | |
p1 = p2; | |
p2 = p3; | |
p3 = p; | |
idx = idx1; | |
idx1 = idx2; | |
idx2 = idx3; | |
} | |
result[ result.length ] = text.substring(start, idx1); | |
return result; | |
} | |
module.exports = tokenize; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment