Skip to content

Instantly share code, notes, and snippets.

@vmus
Created July 27, 2012 14:48
Show Gist options
  • Save vmus/3188456 to your computer and use it in GitHub Desktop.
Save vmus/3188456 to your computer and use it in GitHub Desktop.
Japanese text normalization (fullwidth to halfwidth)
// Japanese text normalization (fullwidth to halfwidth)
function normalize(s) {
function zentohan(c) {
var z2h = {
'ガ':'ガ','ギ':'ギ','グ':'グ','ゲ':'ゲ','ゴ':'ゴ',
'ザ':'ザ','ジ':'ジ','ズ':'ズ','ゼ':'ゼ','ゾ':'ゾ',
'ダ':'ダ','ヂ':'ヂ','ヅ':'ヅ','デ':'デ','ド':'ド',
'バ':'バ','パ':'パ','ビ':'ビ','ピ':'ピ','ブ':'ブ',
'プ':'プ','ベ':'ベ','ペ':'ペ','ボ':'ボ','ポ':'ポ','ヴ':'ヴ',
'ァ':'ァ','ア':'ア','ィ':'ィ','イ':'イ','ゥ':'ゥ','ウ':'ウ','ェ':'ェ','エ':'エ','ォ':'ォ','オ':'オ',
'カ':'カ','キ':'キ','ク':'ク','ケ':'ケ','コ':'コ',
'サ':'サ','シ':'シ','ス':'ス','セ':'セ','ソ':'ソ',
'タ':'タ','チ':'チ','ッ':'ッ','ツ':'ツ','テ':'テ','ト':'ト',
'ナ':'ナ','ニ':'ニ','ヌ':'ヌ','ネ':'ネ','ノ':'ノ',
'ハ':'ハ','ヒ':'ヒ','フ':'フ','ヘ':'ヘ','ホ':'ホ',
'マ':'マ','ミ':'ミ','ム':'ム','メ':'メ','モ':'モ',
'ャ':'ャ','ヤ':'ヤ','ュ':'ュ','ユ':'ユ','ョ':'ョ','ヨ':'ヨ',
'ラ':'ラ','リ':'リ','ル':'ル','レ':'レ','ロ':'ロ','ワ':'ワ','ヲ':'ヲ','ン':'ン',
'。':'。','「':'「','」':'」','、':'、','・':'・','ー':'ー','゛':'゙','゜':'゚',' ':' ',
'¢':'¢','£':'£','¬':'¬',' ̄':'¯','¦':'¦','¥':'¥','₩':'₩'
};
var n = c.charCodeAt(0);
if (c in z2h)
return z2h[c]; // mapped
else if (n >= 0xff00 && n <= 0xff5e)
return String.fromCharCode(n - 0xfee0); // unicode.org/charts/PDF/UFF00.pdf --> U0000.pdf
else
return c; // unchanged
}
return s.replace(/([\u3000-\u301f\u30a0-\u30ff\uff00-\uffef])/g, zentohan);
}
var s = 'ミムメモ ミムメモ ラリル ラリル^<23:30>=(2012年8月27日)UFJ三菱UFJ証券ホールディングス、株式会社 黒逆・三角 ETF~ETF ̄¥85$100.5%FullHD。¢¢££¬¬ ̄¯¦¦¥¥₩₩';
var t = normalize(s);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment