Skip to content

Instantly share code, notes, and snippets.

@aarondai
Created July 16, 2014 15:43
Show Gist options
  • Save aarondai/7b9bc87d6154b9b7eb13 to your computer and use it in GitHub Desktop.
Save aarondai/7b9bc87d6154b9b7eb13 to your computer and use it in GitHub Desktop.
Java:Tika:Sample
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class Sample {
public static void main(String[] args){
InputStream is = null;
try {
is = new BufferedInputStream(new FileInputStream(new File("sample.pdf")));
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler(System.out);
Metadata metadata = new Metadata();
parser.parse(is, handler, metadata, new ParseContext());
for (String name : metadata.names()) {
String value = metadata.get(name);
if (value != null) {
System.out.println("Metadata Name: " + name);
System.out.println("Metadata Value: " + value);
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (TikaException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} finally {
if (is != null) {
try {
is.close();
} catch(IOException e) {
e.printStackTrace();
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment