Skip to content

Instantly share code, notes, and snippets.

@bee-san
Created August 31, 2018 10:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bee-san/da5efa92b21a73032c84916f236afc99 to your computer and use it in GitHub Desktop.
Save bee-san/da5efa92b21a73032c84916f236afc99 to your computer and use it in GitHub Desktop.
function inverseDocumentFrequency(document){
// calculates the inverse document frequency of every sentence
const words_without_stopwords = prettify(document);
const unique_words_set = uniqueWords(words_without_stopwords);
const sentences = document.split(".").map(item => item.trim());
sentences[0] = sentences[0].substring(146);
const lengthOfDocuments = sentences.length;
// prettifys each sentence so it doesn't have stopwords
const wordCountAll = countWords(words_without_stopwords);
// counts words of each sentence
// as each sentence is a document
wordCountSentences = [];
for (let i = 0; i <= lengthOfDocuments - 1; i ++){
wordCountSentences.push(countWords(prettify(sentences[i])));
}
// calculate TF values of all documents
let IDFVals = {};
// how many times that word appears in all sentences (documents)
wordCountSentencesLength = wordCountSentences.length;
// for every unique word
for (let i = 0; i <= unique_words_set.length - 1; i++){
let temp_add = 0;
// count how many times unique word appears in all sentences
for (let x = 0; x <= wordCountSentencesLength - 1; x++){
if (unique_words_set[i] in wordCountSentences[x]){
temp_add =+ 1;
}
}
IDFVals[unique_words_set[i]] = Math.log10(wordCountAll[unique_words_set[i]] / temp_add);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment