Skip to content

Instantly share code, notes, and snippets.

@masud-technope
Created July 13, 2018 20:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save masud-technope/a87484a1689b603a820b91def75882b2 to your computer and use it in GitHub Desktop.
Save masud-technope/a87484a1689b603a820b91def75882b2 to your computer and use it in GitHub Desktop.
Lucene TF calculation
public static final String FIELD_CONTENTS = "contents";
public HashMap<String, Long> calculateTF() {
HashMap<String, Long> termFreqMap = new HashMap<>();
try {
IndexReader reader = DirectoryReader.open(FSDirectory
.open(new File(indexFolder).toPath()));
// String targetTerm = "breakpoint";
Fields fields = MultiFields.getFields(reader);
for (String field : fields) {
Terms terms = fields.terms(field);
TermsEnum termsEnum = terms.iterator();
BytesRef bytesRef;
while ((bytesRef = termsEnum.next()) != null) {
if (termsEnum.seekExact(bytesRef)) {
String term = bytesRef.utf8ToString();
this.keys.add(term);
}
}
}
for (String term : this.keys) {
Term t = new Term(FIELD_CONTENTS, term);
// calculating the TF
long totalTermFreq = reader.totalTermFreq(t);
if (!termFreqMap.containsKey(term)) {
termFreqMap.put(term, totalTermFreq);
totalTermFreqCorpus += totalTermFreq;
}
}
} catch (Exception exc) {
// handle the exception
}
return termFreqMap;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment