Related Article(s):
Created
November 24, 2023 19:00
-
-
Save GroupDocsGists/fcc9f3479db3fce3158b4d91aa7a5231 to your computer and use it in GitHub Desktop.
Regex Search in Files across Folders with Java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Regex Search multiple files across folders using Java | |
// Creating an index folder and add document's folder to it | |
Index index = new Index("path/indexing-folder-path"); | |
index.add("path/parent-folder"); | |
// Prepare the Regex Query and Search | |
// Regex here is to identify all words having any consecutive repeated characters. | |
String query = "^(.)\\1{1,}"; | |
SearchResult result = index.search(query); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Highlighting and Printing Regex Search Results for all the documents | |
for (int i = 0 ; i < result.getDocumentCount(); i++) | |
{ | |
FoundDocument document = result.getFoundDocument(i); | |
OutputAdapter outputAdapter = new FileOutputAdapter(OutputFormat.Html, "path/Highlight" + i + ".html"); | |
Highlighter highlighter = new DocumentHighlighter(outputAdapter); | |
index.highlight(document, highlighter); | |
System.out.println("\tDocument: " + document.getDocumentInfo().getFilePath()); | |
System.out.println("\tOccurrences: " + document.getOccurrenceCount()); | |
for (FoundDocumentField field : document.getFoundFields()) { | |
System.out.println("\t\tField: " + field.getFieldName()); | |
System.out.println("\t\tOccurrences: " + field.getOccurrenceCount()); | |
// Printing found terms | |
if (field.getTerms() != null) { | |
for (int k = 0; k < field.getTerms().length; k++) { | |
System.out.println("\t\t\t" + field.getTerms()[k] + " - " + field.getTermsOccurrences()[k]); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment