Skip to content

Instantly share code, notes, and snippets.

@madneal
Created August 25, 2017 03:23
Show Gist options
  • Save madneal/c792c9470dbd7200e442b65633408b44 to your computer and use it in GitHub Desktop.
Save madneal/c792c9470dbd7200e442b65633408b44 to your computer and use it in GitHub Desktop.
String docsPath = "inputFiles";
String indexPath = "indexedFiles";
Path docDir = Paths.get(docsPath);
Directory dir = FSDirectory.open(Paths.get(indexPath));
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig();
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(dir, iwc);
if (writer != null) {
writer.deleteAll();
}
writer.close();
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
QueryParser qp = new QueryParser("contents", analyzer);
Query query = qp.parse("begin");
TopDocs hits = searcher.search(query, 10);
Formatter formatter = new SimpleHTMLFormatter();
QueryScorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, 10);
highlighter.setTextFragmenter(fragmenter);
for (int i = 0; i < hits.scoreDocs.length; i ++) {
int docid = hits.scoreDocs[i].doc;
Document doc = searcher.doc(docid);
String title = doc.get("path");
System.out.println("Path " + " : " + title);
String text = doc.get("contents");
TokenStream stream = TokenSources.getAnyTokenStream(reader, docid, "contents", analyzer);
String[] frags = highlighter.getBestFragments(stream, text, 10);
for (String frag : frags) {
System.out.println("=========================");
System.out.println(frag);
}
}
dir.close();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment