nemo-kaz/getCodepage.groovy

## getCodepage.groovy
// 文字コード判定
// ソースコードのコードページを再帰的に判定し続ける

import com.ibm.icu.text.CharsetDetector
@Grab(group='com.ibm.icu', module='icu4j', version='56.1')
def detector = new CharsetDetector()

// UTF-8, UTF-16 UTF-32, Windows-31j
// ISO-8859-2, windows-1252, windows-1250, ISO-8859-2, ISO-8859-1, Big5, UTF-16LE

new File(".").eachFileRecurse { file ->
    if(file.isFile() &&
      (file.name.endsWith("TXT")||
       file.name.endsWith("RPGLE")||
       file.name.endsWith("CLP")||
       file.name.endsWith("PF")||
       file.name.endsWith("LF")||
       file.name.endsWith("DSPF")||
       file.name.endsWith("PRTF")||
       file.name.endsWith("RPGLE")||
       file.name.endsWith("cpy")||
       file.name.endsWith("txt")||
       file.name.endsWith("java")||
       file.name.endsWith("text")||
       file.name.endsWith("cbl")||
       file.name.endsWith("jcl")) ) {
        curName  = file.getPath().replaceAll(/.\\(.*)/) {m0,m1 -> m1}
        bytes1 = new File(curName).getBytes()
        fileCodepage = detector.setText(bytes1).detect().getName()
        if (fileCodepage =~ (/ISO-8859-1|ISO-8859-2|windows-1250|windows-1252|Big5|UTF-16LE/)) {
			if(!(file.text =~ /^[^ -~｡-ﾟ]/)) {print "NoKanji "}
		}
        print fileCodepage +"\t"
        println file.getAbsolutePath().minus(".\\")
    }
}
	// 文字コード判定
	// ソースコードのコードページを再帰的に判定し続ける

	import com.ibm.icu.text.CharsetDetector
	@Grab(group='com.ibm.icu', module='icu4j', version='56.1')
	def detector = new CharsetDetector()

	// UTF-8, UTF-16 UTF-32, Windows-31j
	// ISO-8859-2, windows-1252, windows-1250, ISO-8859-2, ISO-8859-1, Big5, UTF-16LE

	new File(".").eachFileRecurse { file ->
	if(file.isFile() &&
	(file.name.endsWith("TXT")\|\|
	file.name.endsWith("RPGLE")\|\|
	file.name.endsWith("CLP")\|\|
	file.name.endsWith("PF")\|\|
	file.name.endsWith("LF")\|\|
	file.name.endsWith("DSPF")\|\|
	file.name.endsWith("PRTF")\|\|
	file.name.endsWith("RPGLE")\|\|
	file.name.endsWith("cpy")\|\|
	file.name.endsWith("txt")\|\|
	file.name.endsWith("java")\|\|
	file.name.endsWith("text")\|\|
	file.name.endsWith("cbl")\|\|
	file.name.endsWith("jcl")) ) {
	curName = file.getPath().replaceAll(/.\\(.*)/) {m0,m1 -> m1}
	bytes1 = new File(curName).getBytes()
	fileCodepage = detector.setText(bytes1).detect().getName()
	if (fileCodepage =~ (/ISO-8859-1\|ISO-8859-2\|windows-1250\|windows-1252\|Big5\|UTF-16LE/)) {
	if(!(file.text =~ /^[^ -~｡-ﾟ]/)) {print "NoKanji "}
	}
	print fileCodepage +"\t"
	println file.getAbsolutePath().minus(".\\")
	}
	}