Last active
October 4, 2023 12:39
-
-
Save guaracyalima/6da5134e1edfeb125378cbdd7008ef98 to your computer and use it in GitHub Desktop.
O trem la de extrair os dados dos malucos das apolices
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package pdxtractor; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.IOException; | |
import org.apache.tika.exception.TikaException; | |
import org.apache.tika.metadata.Metadata; | |
import org.apache.tika.parser.ParseContext; | |
import org.apache.tika.parser.pdf.PDFParser; | |
import org.apache.tika.sax.BodyContentHandler; | |
import org.apache.tika.sax.xpath.XPathParser; | |
import org.xml.sax.SAXException; | |
public class Main { | |
public static void main(String[] args) throws IOException, TikaException, SAXException{ | |
BodyContentHandler handler = new BodyContentHandler(); | |
Metadata metadata = new Metadata(); | |
FileInputStream inputstream = new FileInputStream(new File("/Users/guabirabadev/Desktop/apolices/exibePdf.pdf")); | |
ParseContext pcontext = new ParseContext(); | |
PDFParser pdfparser = new PDFParser(); | |
pdfparser.parse(inputstream, handler, metadata, pcontext); | |
String xpto = handler.toString(); | |
String searchTerm = "Franquias (R$)"; | |
xpto.toLowerCase().contains(searchTerm.toLowerCase()); | |
String[] metadataNames = metadata.names(); | |
for(String name : metadataNames) { | |
System.out.println(name+ " : " + metadata.get(name)); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment