Last active
May 11, 2021 09:20
-
-
Save conholdate-gists/19b2660177df759b284e9442403e92cf to your computer and use it in GitHub Desktop.
Search for a Word in PDF using GroupDocs.Search using Java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Search for a Word in PDF using Java | |
1. Create an Index using Java | |
2. Subscribe to index events | |
3. Add files to the index | |
4. Define search criteria | |
5. Perform search operation | |
6. Print search results | |
7. Highlight search results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
String indexFolder = "C:\\Index\\"; // Specify the path to the index folder | |
String documentsFolder = "C:\\Files\\"; // Specify the path to a folder containing documents to search | |
// Create a new index or | |
// Open an existing index | |
Index index = new Index(indexFolder); | |
// Subscribe to index events | |
index.getEvents().ErrorOccurred.add(new EventHandler<IndexErrorEventArgs>() { | |
public void invoke(Object sender, IndexErrorEventArgs args) { | |
System.out.println(args.getMessage()); // Writing error messages to the console | |
} | |
}); | |
// Add files synchronously | |
index.add(documentsFolder); // Synchronous indexing documents from the specified folder | |
// Perform search | |
String query = "elementum"; // Specify a search query | |
SearchResult result = index.search(query); // Searching in the index | |
// Use search results | |
// Printing the result | |
System.out.println("Documents found: " + result.getDocumentCount()); | |
System.out.println("Total occurrences found: " + result.getOccurrenceCount()); | |
for (int i = 0; i < result.getDocumentCount(); i++) { | |
FoundDocument document = result.getFoundDocument(i); | |
System.out.println("\tDocument: " + document.getDocumentInfo().getFilePath()); | |
System.out.println("\tOccurrences: " + document.getOccurrenceCount()); | |
} | |
// Highlight occurrences in text | |
if (result.getDocumentCount() > 0) { | |
FoundDocument document = result.getFoundDocument(0); // Getting the first found document | |
String path = "C:\\Output\\Highlighted.html"; | |
OutputAdapter outputAdapter = new FileOutputAdapter(path); // Creating the output adapter to a file | |
HtmlHighlighter highlighter = new HtmlHighlighter(outputAdapter); // Creating the HtmlHighlighter object | |
index.highlight(document, highlighter); // Generating output HTML formatted document with highlighted search results | |
System.out.println(); | |
System.out.println("Generated HTML file can be opened with Internet browser."); | |
System.out.println("The file can be found by the following path:"); | |
System.out.println(Paths.get(path).toAbsolutePath().toString()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment