Skip to content

Instantly share code, notes, and snippets.

@bdkosher
Created April 29, 2020 00:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bdkosher/40acc61aa10fa4369889c49fc5c6b2e6 to your computer and use it in GitHub Desktop.
Save bdkosher/40acc61aa10fa4369889c49fc5c6b2e6 to your computer and use it in GitHub Desktop.
import groovy.transform.*
import java.nio.*
import java.nio.charset.*
@Field CharsetDecoder cs = Charset.forName('UTF-8').newDecoder()
InputStream.metaClass.eachChunk << { int preferredChunkSize, Closure closure ->
delegate.eachByte(preferredChunkSize) { buffer, bytesRead ->
if (bytesRead == preferredChunkSize) {
closure(buffer)
} else if (bytesRead > 0) {
byte[] data = new byte[bytesRead]
System.arraycopy(buffer, 0, data, 0, bytesRead)
closure(data)
}
}
}
int pad = 10
String all = new File(/C:\dev\cp\PRPS\admin\wsdl_all.txt/).text
new File(/C:\dev\cp\PRPS\admin\wsdl_all_fixed)2.txt/) << all
int counter = 0
new File(/C:\dev\cp\PRPS\admin\wsdl_all_fixed.txt/).withInputStream { is ->
is.eachChunk(2) { bytes ->
if (!isValidUTF8(bytes)) {
println "${new String(bytes, 'windows-1252')} is an illegal character at index $counter: ${all[(counter - pad)..(counter + pad)]}"
}
counter += 1
}
}
boolean isValidUTF8( byte[] input ) {
try {
cs.decode(ByteBuffer.wrap(input));
return true;
} catch(CharacterCodingException e){
return false;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment