Skip to content

Instantly share code, notes, and snippets.

@ravi07bec
Created November 26, 2021 06:20
Show Gist options
  • Save ravi07bec/0951886fadd7dbe224a2fefd39bc5ec3 to your computer and use it in GitHub Desktop.
Save ravi07bec/0951886fadd7dbe224a2fefd39bc5ec3 to your computer and use it in GitHub Desktop.
#Term Frequency
def termfreq(document, word):
N = len(document)
occurance = len([token for token in document if token == word])
return occurance/N
#Inverse Document Frequency
def inverse_doc_freq(word):
try:
word_occurance = word_count[word] + 1
except:
word_occurance = 1
return np.log(total_documents/word_occurance)
#Combining the functions
def tf_idf(sentence):
tf_idf_vec = np.zeros((len(word_set),))
for word in sentence:
tf = termfreq(sentence,word)
idf = inverse_doc_freq(word)
value = tf*idf
tf_idf_vec[index_dict[word]] = value
return tf_idf_vec
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment