Related Article(s):
Last active
November 21, 2023 11:52
Efficient Text Search: Handling Multiple Files and Folders using Java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Printing Search Results === | |
for (int i = 0 ; i < result.getDocumentCount(); i++) | |
{ | |
FoundDocument document = result.getFoundDocument(i); | |
// Printing Search Results =========== | |
System.out.println("Occurrences: " + document.getOccurrenceCount()); | |
for (FoundDocumentField field : document.getFoundFields()) { | |
System.out.println("\tField: " + field.getFieldName()); | |
System.out.println("\tOccurrences: " + field.getOccurrenceCount()); | |
// Printing found terms | |
if (field.getTerms() != null) { | |
for (int k = 0; k < field.getTerms().length; k++) { | |
System.out.println("\t\t" + field.getTerms()[k] + " - " + field.getTermsOccurrences()[k]); | |
} | |
} | |
// Printing found phrases | |
if (field.getTermSequences() != null) { | |
for (int k = 0; k < field.getTermSequences().length; k++) { | |
String[] terms = field.getTermSequences()[k]; | |
String sequence = ""; | |
for (String term : terms) { | |
sequence += term + " "; | |
} | |
System.out.println("\t\t" + sequence + " - " + field.getTermSequencesOccurrences()[k]); | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Text Search in Multiple files of various file formats within Multiple Folders using Java | |
// Creating an index in the specified folder | |
Index index = new Index("path/for/indexingFolder"); | |
// Indexing documents from the specified folder | |
index.add("path/parent-folder/"); | |
String query = "water OR \"Lorem ipsum\" OR non"; | |
SearchResult result = index.search(query); | |
for (int i = 0 ; i < result.getDocumentCount(); i++) | |
{ | |
FoundDocument document = result.getFoundDocument(i); | |
System.out.println("===================================="); | |
System.out.println("File Name: " + document.getDocumentInfo().getFilePath()); | |
OutputAdapter outputAdapter = new FileOutputAdapter(OutputFormat.Html, "/path/Highlighted-" + i + ".html"); | |
Highlighter highlighter = new DocumentHighlighter(outputAdapter); | |
index.highlight(document, highlighter); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Text Search in Multiple files of various file formats within Multiple Folders using Java | |
// Creating an index in the specified folder | |
Index index = new Index("path/for/indexingFolder"); | |
// Indexing documents from the specified folder | |
index.add("path/parent-folder/"); | |
String query = "water OR \"Lorem ipsum\" OR non"; | |
SearchResult result = index.search(query); | |
for (int i = 0 ; i < result.getDocumentCount(); i++) | |
{ | |
FoundDocument document = result.getFoundDocument(i); | |
System.out.println("===================================="); | |
System.out.println("File Name: " + document.getDocumentInfo().getFilePath()); | |
OutputAdapter outputAdapter = new FileOutputAdapter(OutputFormat.Html, "/path/Highlighted-" + i + ".html"); | |
Highlighter highlighter = new DocumentHighlighter(outputAdapter); | |
index.highlight(document, highlighter); | |
// Printing Search Results =========== | |
System.out.println("Occurrences: " + document.getOccurrenceCount()); | |
for (FoundDocumentField field : document.getFoundFields()) { | |
System.out.println("\tField: " + field.getFieldName()); | |
System.out.println("\tOccurrences: " + field.getOccurrenceCount()); | |
// Printing found terms | |
if (field.getTerms() != null) { | |
for (int k = 0; k < field.getTerms().length; k++) { | |
System.out.println("\t\t" + field.getTerms()[k] + " - " + field.getTermsOccurrences()[k]); | |
} | |
} | |
// Printing found phrases | |
if (field.getTermSequences() != null) { | |
for (int k = 0; k < field.getTermSequences().length; k++) { | |
String[] terms = field.getTermSequences()[k]; | |
String sequence = ""; | |
for (String term : terms) { | |
sequence += term + " "; | |
} | |
System.out.println("\t\t" + sequence + " - " + field.getTermSequencesOccurrences()[k]); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment