Skip to content

Instantly share code, notes, and snippets.

@kellyrob99
Created October 21, 2011 07:23
Show Gist options
  • Save kellyrob99/1303290 to your computer and use it in GitHub Desktop.
Save kellyrob99/1303290 to your computer and use it in GitHub Desktop.
CharsetDetector task for Gradle, utilizes juniversalchardet
import org.mozilla.universalchardet.UniversalDetector
buildscript {
repositories {
mavenCentral()
}
dependencies {
classpath files('juniversalchardet-1.0.3.jar') // not available in public repository :(
}
}
task findEncoding(type: CharsetDetector) {
inputFile = file('someFile')
}
class CharsetDetector extends DefaultTask
{
@InputFile
File inputFile
/**
* Straight port of code sample on http://code.google.com/p/juniversalchardet/
* @return
*/
@TaskAction
def detect()
{
byte[] buf = new byte[4096]
FileInputStream fis = new FileInputStream(inputFile)
// (1)
UniversalDetector detector = new UniversalDetector(null)
// (2)
int nread
while ((nread = fis.read(buf)) > 0 && !detector.isDone())
{
detector.handleData(buf, 0, nread)
}
// (3)
detector.dataEnd()
// (4)
String encoding = detector.getDetectedCharset()
if (encoding)
{
println("Detected encoding = " + encoding)
}
else
{
println("No encoding detected.")
}
// (5)
detector.reset()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment