// g100pon #32 文字コード判定 | |
import com.ibm.icu.text.CharsetDetector | |
@Grab(group='com.ibm.icu', module='icu4j', version='4.0.1') | |
def detector = new CharsetDetector() | |
//ファイルは先に用意してあります... | |
//ISO-2022-JP | |
def bytes1 = new File('/works/test/iso2022-jp.txt').getBytes() | |
def name1 = detector.setText(bytes1).detect().getName() | |
assert name1 == 'ISO-2022-JP' | |
//UTF-8 | |
def bytes2 = new File('/works/test/utf8.txt').getBytes() | |
def name2 = detector.setText(bytes2).detect().getName() | |
assert name2 == 'UTF-8' | |
//EUC-JP | |
def bytes3 = new File('/works/test/eucjp.txt').getBytes() | |
def name3 = detector.setText(bytes3).detect().getName() | |
assert name3 == 'EUC-JP' | |
//Shift_JIS | |
def bytes4 = new File('/works/test/sjis.txt').getBytes() | |
def name4 = detector.setText(bytes4).detect().getName() | |
assert name4 == 'Shift_JIS' | |
//CP932 | |
def bytes5 = new File('/works/test/cp932.txt').getBytes() | |
def name5 = detector.setText(bytes5).detect().getName() | |
assert name5 == 'Shift_JIS' //※CP932はShift_JISとして認識されてしまう。機種依存文字を入れてもNG。 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment