This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// get all text from .story-body within p tags on a BBC news web article | |
let $article = $('.story-body').find('p').text(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let $article = $('.story-body').find('p') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function TFIDF(documents){ | |
// calculates TF*IDF | |
const TFVals = termFrequency(documents); | |
const IDFVals = inverseDocumentFrequency(documents); | |
let TFidfDict = {}; | |
for (const [key, value] of Object.entries(TFVals)){ | |
if (key in IDFVals){ | |
TFidfDict[key] = TFVals[key] * IDFVals[key]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function TFIDF(documents){ | |
// calculates TF*IDF | |
const TFVals = termFrequency(documents); | |
const IDFVals = inverseDocumentFrequency(documents); | |
let TFidfDict = {}; | |
for (const [key, value] of Object.entries(TFVals)){ | |
if (key in IDFVals){ | |
TFidfDict[key] = TFVals[key] * IDFVals[key]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function TFIDF(documents){ | |
// calculates TF*IDF | |
const TFVals = termFrequency(documents); | |
const IDFVals = inverseDocumentFrequency(documents); | |
let TFidfDict = {}; | |
for (const [key, value] of Object.entries(TFVals)){ | |
if (key in IDFVals){ | |
TFidfDict[key] = TFVals[key] * IDFVals[key]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function inverseDocumentFrequency(document){ | |
// calculates the inverse document frequency of every sentence | |
const words_without_stopwords = prettify(document); | |
const unique_words_set = uniqueWords(words_without_stopwords); | |
const sentences = document.split(".").map(item => item.trim()); | |
sentences[0] = sentences[0].substring(146); | |
const lengthOfDocuments = sentences.length; | |
// prettifys each sentence so it doesn't have stopwords |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function inverseDocumentFrequency(documents){ | |
// calculates the inverse document frequency of every sentence | |
const words_without_stopwords = prettify(documents); | |
const sentences = documents.split(".") | |
sentences[0] = sentences[0].substring(146); | |
const lengthOfDocuments = sentences.length; | |
const WordCountDocuments = countWords(words_without_stopwords); | |
// calculate TF values of all documents |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function inverseDocumentFrequency(document){ | |
// calculates the inverse document frequency of every sentence | |
const words_without_stopwords = prettify(document); | |
const unique_words_set = uniqueWords(words_without_stopwords); | |
const sentences = document.split(".").map(item => item.trim()); | |
sentences[0] = sentences[0].substring(146); | |
const lengthOfDocuments = sentences.length; | |
// prettifys each sentence so it doesn't have stopwords |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function termFrequency(document){ | |
// calculates term frequency of each sentence | |
words_without_stopwords = prettify(document); | |
// gets rid of trailing spaces | |
const sentences = document.split(".").map(item => item.trim()); | |
sentences[0] = sentences[0].substring(146); | |
const TFVals = countWords(words_without_stopwords) | |
const unique_words = uniqueWords(words_without_stopwords); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function termFrequency(document){ | |
// calculates term frequency of each sentence | |
words_without_stopwords = prettify(document); | |
// gets rid of trailing spaces | |
const sentences = document.split(".").map(item => item.trim()); | |
sentences[0] = sentences[0].substring(146); | |
const TFVals = countWords(words_without_stopwords) | |
const unique_words = uniqueWords(words_without_stopwords); |