Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
// Create an instance of Parser class
try (Parser parser = new Parser("sample.docx")) {
// Extract metadata from Word document (.docx)
Iterable<MetadataItem> metadata = parser.getMetadata();
// Check if metadata extraction is supported
if (metadata == null) {
System.out.println("Metadata extraction isn't supported.");
return;
}
// Print metadata
for (MetadataItem item : metadata) {
System.out.println(String.format("%s = %s", item.getName(), item.getValue()));
}
}
// Create an extractor factory
ExtractorFactory factory = new ExtractorFactory();
// Create a metadata extractor
MetadataExtractor extractor = factory.createMetadataExtractor(filePath);
// Extract metadata from Word document
MetadataCollection metadata = extractor.extractMetadata(filePath);
// Print metadata
for (String key : metadata.getKeys()) {
String value = metadata.get_Item(key);
System.out.println(String.format("%s = %s", key, value));
}
// Create an instance of Parser class
try (Parser parser = new Parser("sample.pdf")) {
// Extract a text from PDF document to the reader
try (TextReader reader = parser.getText()) {
// Check if text extraction is supported
if (reader == null) {
System.out.println("Text extraction isn't supported.");
return;
}
// Extract a text from the reader
String textLine = null;
do {
textLine = reader.readLine();
if (textLine != null) {
System.out.println(textLine);
}
}
while (textLine != null);
}
}
// Create an extractor factory
ExtractorFactory factory = new ExtractorFactory();
// Create a text extractor
try (TextExtractor extractor = factory.createTextExtractor("sample.pdf")) {
// Extract a text from the text extractor
String textLine = null;
do {
textLine = extractor.extractLine();
if (textLine != null) {
System.out.println(textLine);
}
}
while (textLine != null);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.