Skip to content

Instantly share code, notes, and snippets.

@guaracyalima
Last active October 4, 2023 12:39
Show Gist options
  • Save guaracyalima/6da5134e1edfeb125378cbdd7008ef98 to your computer and use it in GitHub Desktop.
Save guaracyalima/6da5134e1edfeb125378cbdd7008ef98 to your computer and use it in GitHub Desktop.
O trem la de extrair os dados dos malucos das apolices
package pdxtractor;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.xpath.XPathParser;
import org.xml.sax.SAXException;
public class Main {
public static void main(String[] args) throws IOException, TikaException, SAXException{
BodyContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
FileInputStream inputstream = new FileInputStream(new File("/Users/guabirabadev/Desktop/apolices/exibePdf.pdf"));
ParseContext pcontext = new ParseContext();
PDFParser pdfparser = new PDFParser();
pdfparser.parse(inputstream, handler, metadata, pcontext);
String xpto = handler.toString();
String searchTerm = "Franquias (R$)";
xpto.toLowerCase().contains(searchTerm.toLowerCase());
String[] metadataNames = metadata.names();
for(String name : metadataNames) {
System.out.println(name+ " : " + metadata.get(name));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment