Skip to content

Instantly share code, notes, and snippets.

@GroupDocsGists
Last active November 21, 2023 11:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save GroupDocsGists/51072633130fc578caafd9b79628cfb1 to your computer and use it in GitHub Desktop.
Save GroupDocsGists/51072633130fc578caafd9b79628cfb1 to your computer and use it in GitHub Desktop.
Efficient Text Search: Handling Multiple Files and Folders using Java
// Printing Search Results ===
for (int i = 0 ; i < result.getDocumentCount(); i++)
{
FoundDocument document = result.getFoundDocument(i);
// Printing Search Results ===========
System.out.println("Occurrences: " + document.getOccurrenceCount());
for (FoundDocumentField field : document.getFoundFields()) {
System.out.println("\tField: " + field.getFieldName());
System.out.println("\tOccurrences: " + field.getOccurrenceCount());
// Printing found terms
if (field.getTerms() != null) {
for (int k = 0; k < field.getTerms().length; k++) {
System.out.println("\t\t" + field.getTerms()[k] + " - " + field.getTermsOccurrences()[k]);
}
}
// Printing found phrases
if (field.getTermSequences() != null) {
for (int k = 0; k < field.getTermSequences().length; k++) {
String[] terms = field.getTermSequences()[k];
String sequence = "";
for (String term : terms) {
sequence += term + " ";
}
System.out.println("\t\t" + sequence + " - " + field.getTermSequencesOccurrences()[k]);
}
}
}
}
// Text Search in Multiple files of various file formats within Multiple Folders using Java
// Creating an index in the specified folder
Index index = new Index("path/for/indexingFolder");
// Indexing documents from the specified folder
index.add("path/parent-folder/");
String query = "water OR \"Lorem ipsum\" OR non";
SearchResult result = index.search(query);
for (int i = 0 ; i < result.getDocumentCount(); i++)
{
FoundDocument document = result.getFoundDocument(i);
System.out.println("====================================");
System.out.println("File Name: " + document.getDocumentInfo().getFilePath());
OutputAdapter outputAdapter = new FileOutputAdapter(OutputFormat.Html, "/path/Highlighted-" + i + ".html");
Highlighter highlighter = new DocumentHighlighter(outputAdapter);
index.highlight(document, highlighter);
}
// Text Search in Multiple files of various file formats within Multiple Folders using Java
// Creating an index in the specified folder
Index index = new Index("path/for/indexingFolder");
// Indexing documents from the specified folder
index.add("path/parent-folder/");
String query = "water OR \"Lorem ipsum\" OR non";
SearchResult result = index.search(query);
for (int i = 0 ; i < result.getDocumentCount(); i++)
{
FoundDocument document = result.getFoundDocument(i);
System.out.println("====================================");
System.out.println("File Name: " + document.getDocumentInfo().getFilePath());
OutputAdapter outputAdapter = new FileOutputAdapter(OutputFormat.Html, "/path/Highlighted-" + i + ".html");
Highlighter highlighter = new DocumentHighlighter(outputAdapter);
index.highlight(document, highlighter);
// Printing Search Results ===========
System.out.println("Occurrences: " + document.getOccurrenceCount());
for (FoundDocumentField field : document.getFoundFields()) {
System.out.println("\tField: " + field.getFieldName());
System.out.println("\tOccurrences: " + field.getOccurrenceCount());
// Printing found terms
if (field.getTerms() != null) {
for (int k = 0; k < field.getTerms().length; k++) {
System.out.println("\t\t" + field.getTerms()[k] + " - " + field.getTermsOccurrences()[k]);
}
}
// Printing found phrases
if (field.getTermSequences() != null) {
for (int k = 0; k < field.getTermSequences().length; k++) {
String[] terms = field.getTermSequences()[k];
String sequence = "";
for (String term : terms) {
sequence += term + " ";
}
System.out.println("\t\t" + sequence + " - " + field.getTermSequencesOccurrences()[k]);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment