Skip to content

Instantly share code, notes, and snippets.

@PreyMa

PreyMa/index.js

Created Jan 29, 2021
Embed
What would you like to do?
Prototype of the g/psad fuzzy search algorithm in (poor) JS
/*
* This file is part of an article written by PreyMa on egimoto.com
* Come and visit us:
* https://www.egimoto.com/a/db11b2894ec52474
*/
// Global variables of the search index
const index= [];
const map= new Map();
// Create array of grams from all sanitized words
function toGrams( item ) {
const arr= [];
item.trim().match(/[\pL\w]+/g).forEach( word => {
word.split('').forEach((char, i, a) => {
if( i !== a.length-1 ) {
arr.push( (char+ a[i+1]).toLowerCase() );
}
});
});
return arr;
}
// Count all unique gram types and build index
function mapCount( grams ) {
const m= new Map();
grams.forEach( (gram, i) => {
let o= m.get( gram );
if( !o ) {
o= { count: 0, positions: [] };
m.set( gram, o );
}
o.count++;
o.positions.push( i );
});
return m;
}
// Build index from array of string
// The global variable 'data' is defined in the data.js file
data.forEach((item, i) => {
// Get grams
const grams= toGrams( item );
// Add to index map
grams.forEach( gram => {
let o= map.get( gram );
if( !o ) {
o= { count: 0, entries: [] };
map.set( gram, o );
}
o.count++;
o.entries.push( i );
});
// Count grams and add them to the index array
const m= mapCount( grams );
index.push({
string: item,
grams: m
});
});
// Build UI
document.addEventListener('DOMContentLoaded', () => {
const box= document.getElementById('inp');
const out= document.getElementById('out');
document.getElementById('go').addEventListener( 'click', () => {
performance.mark('start');
const grams= toGrams( box.value ); // Array of grams
const m= mapCount( grams ); // Map of counted grams
//console.log( grams );
// Create set of songs containing any of the query grams
const s= new Set();
grams.forEach( g => {
const e= map.get( g );
if( e ) {
e.entries.forEach( song => s.add(song) );
}
});
// Iterate songs
const a= [];
for( let idx of s ) {
// Get song map
const song= index[idx];
if( song ) {
let gsad= 0, psad= 0;
// Get position diff of first gram as offset
let posOffset= 0;
grams.some( g => {
if( song.grams.has(g) ) {
posOffset= song.grams.get(g).positions[0];
return true;
}
});
// Iterate grams
for( let [key, o] of m ) {
if( song.grams.has(key) ) {
const songGram= song.grams.get(key);
const songCnt= songGram.count;
const queryCnt= o.count;
// The song name is usually longer than the query
// Don't count too little occurances as an error
if( songCnt <= queryCnt ) {
gsad+= Math.abs( songCnt - queryCnt );
}
let ps= 1000000;
songGram.positions.forEach( p => {
o.positions.forEach( pp => {
ps= Math.min( ps, Math.abs( p- pp- posOffset ) ); //console.log( p, pp, ps );
});
});
psad+= ps;
} else {
// If the song name does not contain the gram count all as error
gsad+= o.count;
}
}
// Push to results
a.push({song, gsad, psad});
//console.log( gsad, psad, posOffset, song )
}
}
// Sort results first by gsad then by psad
a.sort( (a,b) => {
if( a.gsad === b.gsad ) {
return a.psad- b.psad;
}
return a.gsad- b.gsad;
});
// Calc run time (just out of curiousity)
performance.mark('end');
performance.measure('start', 'end');
const time= performance.getEntriesByType('measure')[0].duration*1000;
performance.clearMarks();
performance.clearMeasures();
// Build html for result table
let h= `<div>Time: ${time}</div>`;
a.forEach( o => {
h+= `<div>${o.gsad}/${o.psad}: ${o.song.string}</div>`;
});
out.innerHTML= h;
});
// Focus input box by default
box.focus();
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment