Created
January 18, 2016 16:15
-
-
Save nhojpatrick/ac01a3b3d791364b26f8 to your computer and use it in GitHub Desktop.
TikaDetectTester for tika user mailing list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.BufferedInputStream; | |
import java.io.FileInputStream; | |
import java.io.FileOutputStream; | |
import java.io.InputStream; | |
import org.apache.tika.config.TikaConfig; | |
import org.apache.tika.detect.Detector; | |
import org.apache.tika.io.TikaInputStream; | |
import org.apache.tika.metadata.Metadata; | |
import org.apache.tika.mime.MediaType; | |
public class TikaDetectTester { | |
public static void main(final String[] args) throws Exception { | |
final FileInputStream fis = new FileInputStream(args[0]); | |
final BufferedInputStream bis = new BufferedInputStream(fis); | |
bis.mark(Integer.MAX_VALUE - 8); | |
final int bufferSize = bis.available(); | |
try (final FileOutputStream fileOutputStream = new FileOutputStream(args[0] + ".out1");) { | |
byte[] buffer = new byte[bufferSize]; | |
int noOfBytes = 0; | |
while ((noOfBytes = bis.read(buffer)) != -1) { | |
fileOutputStream.write(buffer, 0, noOfBytes); | |
} | |
} | |
bis.reset(); | |
final TikaInputStream tis = TikaInputStream.get(bis); | |
final TikaConfig tikaConfig = TikaConfig.getDefaultConfig(); | |
final Detector tikaDetector = tikaConfig.getDetector(); | |
final Metadata metadata = new Metadata(); | |
final MediaType detected = tikaDetector.detect(tis, metadata); | |
System.out.println("Tika Detected Media Type = " + String.valueOf(detected) + ";"); | |
try (final InputStream inputStream = bis; | |
final FileOutputStream fileOutputStream = new FileOutputStream(args[0] + ".out2");) { | |
byte[] buffer = new byte[bufferSize]; | |
int noOfBytes = 0; | |
while ((noOfBytes = bis.read(buffer)) != -1) { | |
fileOutputStream.write(buffer, 0, noOfBytes); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment