Created April 22, 2024 04:28
Cosine Similarity implemented in Typescript
// slammed this together after reaading an interesting from-scratch series of posts on TF-IDF + Cosine Similarity.
// it will be slow, if it even works. totally untested and full of misguided practices.
// might come back to this some day and make it at least functional.
class Vector extends Map<string, number> {
order: Set<string> | null = null;
static withOrder(v: Vocabulary) {
const vector = new Vector();
vector.order = v;
return vector;
orderedEntries() {
return Array.from(this.entries()).sort((a, b) => a[0] === b[0]
? 0
: a[0] < b[0]
? -1
: 1);
orderedValues() {
return this.orderedEntries().map((entry) => entry[1]);
l2norm() {
const denom = Math.sqrt((this.orderedValues().map((val) => val ^ 2).reduce((prev, cur) => prev + cur)));
this.forEach((val, key) => this.set(key, val / denom));
type Vocabulary = Set<string>;
class InverseDocumentFrequency {
private vocab: Vocabulary;
private _weights: Vector;
private documentFrequency: Vector;
private docs: CosineSimilarityDocument<any>[];
get weights() { return this._weights.orderedValues(); }
constructor(docs: CosineSimilarityDocument<any>[], vocab: Vocabulary) { = docs;
this.vocab = vocab;
this._weights = Vector.withOrder(this.vocab);
this.documentFrequency = Vector.withOrder(this.vocab);
updateFrequency(term: string) {
this.documentFrequency.set(term, (this.documentFrequency.get(term) || 0) + 1);
updateWeights() {
for (const [term, count] of this.documentFrequency) {
this._weights.set(term, Math.log( / (1 + count)));
class CosineSimilarityDocument<T> {
private doc: T;
private tf: Vector;
private _tfidf: Vector;
private idf: InverseDocumentFrequency;
private vocab: Vocabulary;
private toStringArray: (doc: T) => string[];
get vector() {
if (this.idf.weights.length === 0) this.idf.updateWeights();
if (this._tfidf.size === 0) {[term, value], idx) =>
this._tfidf.set(term, value * this.idf.weights[idx]));
return this._tfidf;
constructor(doc: T, vocab: Vocabulary, idf: InverseDocumentFrequency, toStringArray: (doc: T) => string[]) {
this.doc = doc;
this.toStringArray = toStringArray;
this.idf = idf;
this.vocab = vocab;
this._tfidf = Vector.withOrder(this.vocab); = this.buildTf();
private buildTf() {
const terms = this.toStringArray(this.doc);
const v = Vector.withOrder(this.vocab);
for (const term of terms) {
const prev = v.get(term) || 0;
v.set(term, prev + 1);
if (prev === 0) this.idf.updateFrequency(term);
return v;
interface CosineSimilaritySearchResult<T> {
readonly doc: CosineSimilarityDocument<T>;
readonly result: number;
interface CosineSimilaritySearchResults<T> {
readonly tf: Vector;
readonly weights: number[];
readonly results: CosineSimilaritySearchResult<T>[];
class CosineSimilarityController<T> {
readonly vocab: Vocabulary;
readonly idf: InverseDocumentFrequency;
readonly docs: CosineSimilarityDocument<T>[];
readonly toStringArray: (doc: T) => string[];
private _searchCache: Map<T, CosineSimilaritySearchResults<T>>
constructor(docs: T[], toStringArray: (doc: T) => string[]) {
this.toStringArray = toStringArray;
this.vocab = new Set(); = new Array(docs.length);
this.idf = new InverseDocumentFrequency(, this.vocab);
this._searchCache = new Map();
docs.forEach((doc, idx) =>[idx] = new CosineSimilarityDocument<T>(doc, this.vocab, this.idf, toStringArray));
private getSearchTF(query: T): Vector {
const tf = Vector.withOrder(this.vocab);
const terms = this.toStringArray(query);
for (const term of terms) {
const prev = tf.get(term) || 0;
tf.set(term, prev + 1);
return tf;
private getCosine(a: Vector, b: Vector) {
let dotProduct = 0;
const aVals = a.orderedValues();
const bVals = b.orderedValues();
for (let idx = 0; idx < a.size; idx++) {
dotProduct += aVals[idx] * bVals[idx];
return dotProduct / (
Math.sqrt(aVals.reduce((prev, curr) => prev + curr ^ 2)) *
Math.sqrt(bVals.reduce((prev, curr) => prev + curr ^ 2))
search(query: T): CosineSimilaritySearchResults<T> {
const tf = this.getSearchTF(query);
const tfidf = Vector.withOrder(this.vocab);
tf.orderedEntries().forEach(([term, value], idx) =>
tfidf.set(term, value * this.idf.weights[idx]));
const results = CosineSimilaritySearchResult<T> => Object({
doc: doc,
result: this.getCosine(tfidf, doc.vector)
})).sort((a, b) => a.result === b.result ? 0
: a.result < b.result ? -1 : 1);
const result = { tf, weights: Array.from(this.idf.weights), results };
this._searchCache.set(query, result);
return result;
