Skip to content

Instantly share code, notes, and snippets.

@B-R-P
Last active January 27, 2024 13:50
Show Gist options
  • Save B-R-P/5a6d6a98dc95440154b52600f579121c to your computer and use it in GitHub Desktop.
Save B-R-P/5a6d6a98dc95440154b52600f579121c to your computer and use it in GitHub Desktop.
Score each sentence in a text using gzip
import zlib from 'zlib';
import sbd from 'sbd';
function calculateZScore(arr) {
const mean = arr.reduce((sum, value) => sum + value, 0) / arr.length;
const stdDeviation = Math.sqrt(arr.reduce((sum, value) => sum + Math.pow(value - mean, 2), 0) / arr.length);
return arr.map(value => (value - mean) / stdDeviation);
}
function scoreSentence(text) {
const sentences = sbd.sentences(text);
let score = new Array(sentences.length).fill(0);
const getCompressedSize = (sentence) => zlib.gzipSync(sentence.toLowerCase()).length;
const compressed = sentences.map(getCompressedSize);
for (let p1 = 0; p1 < compressed.length; p1++) {
for (let p2 = p1; p2 < compressed.length; p2++) {
const similarity = 1-((getCompressedSize(sentences[p1] + sentences[p2]) - Math.min(compressed[p1], compressed[p2])) /
Math.max(compressed[p1], compressed[p2]));
score[p1] += similarity;
score[p2] += similarity;
}
}
const result = {};
score = calculateZScore(score)
for (const i of score.map((val, index) => ({ val, index }))) {
result[i.val] = sentences[i.index];
}
return result;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment