Skip to content

Instantly share code, notes, and snippets.

@lakefox
Created June 8, 2023 23:40
Show Gist options
  • Save lakefox/f67e5c34ddbcafde2a4c5f030db55a70 to your computer and use it in GitHub Desktop.
Save lakefox/f67e5c34ddbcafde2a4c5f030db55a70 to your computer and use it in GitHub Desktop.
Paraphrasing Text without Machine Learning in JavaScript: full tutorial @ decode.sh
function smmry(text) {
var doc = [];
var stoplist = [
"",
"a",
"about",
"above",
"above",
"across",
"after",
"afterwards",
"again",
"against",
"all",
"almost",
"alone",
"along",
"already",
"also",
"although",
"always",
"am",
"among",
"amongst",
"amoungst",
"amount",
"an",
"and",
"another",
"any",
"anyhow",
"anyone",
"anything",
"anyway",
"anywhere",
"are",
"around",
"as",
"at",
"back",
"be",
"became",
"because",
"become",
"becomes",
"becoming",
"been",
"before",
"beforehand",
"behind",
"being",
"below",
"beside",
"besides",
"between",
"beyond",
"bill",
"both",
"bottom",
"but",
"by",
"call",
"can",
"cannot",
"cant",
"co",
"con",
"could",
"couldnt",
"cry",
"de",
"describe",
"detail",
"do",
"done",
"down",
"due",
"during",
"each",
"eg",
"eight",
"either",
"eleven",
"else",
"elsewhere",
"empty",
"enough",
"etc",
"even",
"ever",
"every",
"everyone",
"everything",
"everywhere",
"except",
"few",
"fifteen",
"fify",
"fill",
"find",
"fire",
"first",
"five",
"for",
"former",
"formerly",
"forty",
"found",
"four",
"from",
"front",
"full",
"further",
"get",
"give",
"go",
"had",
"has",
"hasnt",
"have",
"he",
"hence",
"her",
"here",
"hereafter",
"hereby",
"herein",
"hereupon",
"hers",
"herself",
"him",
"himself",
"his",
"how",
"however",
"hundred",
"ie",
"if",
"in",
"inc",
"indeed",
"interest",
"into",
"is",
"it",
"its",
"itself",
"keep",
"last",
"latter",
"latterly",
"least",
"less",
"ltd",
"made",
"many",
"may",
"me",
"meanwhile",
"might",
"mill",
"mine",
"more",
"moreover",
"most",
"mostly",
"move",
"much",
"must",
"my",
"myself",
"name",
"namely",
"neither",
"never",
"nevertheless",
"next",
"nine",
"no",
"nobody",
"none",
"noone",
"nor",
"not",
"nothing",
"now",
"nowhere",
"of",
"off",
"often",
"on",
"once",
"one",
"only",
"onto",
"or",
"other",
"others",
"otherwise",
"our",
"ours",
"ourselves",
"out",
"over",
"own",
"part",
"per",
"perhaps",
"please",
"put",
"rather",
"re",
"same",
"see",
"seem",
"seemed",
"seeming",
"seems",
"serious",
"several",
"she",
"should",
"show",
"side",
"since",
"sincere",
"six",
"sixty",
"so",
"some",
"somehow",
"someone",
"something",
"sometime",
"sometimes",
"somewhere",
"still",
"such",
"system",
"take",
"ten",
"than",
"that",
"the",
"their",
"them",
"themselves",
"then",
"thence",
"there",
"thereafter",
"thereby",
"therefore",
"therein",
"thereupon",
"these",
"they",
"thickv",
"thin",
"third",
"this",
"those",
"though",
"three",
"through",
"throughout",
"thru",
"thus",
"to",
"together",
"too",
"top",
"toward",
"towards",
"twelve",
"twenty",
"two",
"un",
"under",
"until",
"up",
"upon",
"us",
"very",
"via",
"was",
"we",
"well",
"were",
"what",
"whatever",
"when",
"whence",
"whenever",
"where",
"whereafter",
"whereas",
"whereby",
"wherein",
"whereupon",
"wherever",
"whether",
"which",
"while",
"whither",
"who",
"whoever",
"whole",
"whom",
"whose",
"why",
"will",
"with",
"within",
"without",
"would",
"yet",
"you",
"your",
"yours",
"yourself",
"yourselves",
"the",
];
let paragraphs = text.split("\n").map((e, i) => {
return {
sentences: breakSentence(e),
index: i,
};
});
//Index sentences in document
paragraphs.forEach((paragraph, pi) => {
paragraph.sentences.forEach((sentence, index) => {
var words = sentence
.split(" ")
.filter((n) => stoplist.indexOf(n) == -1);
doc.push({
sentence,
words,
index,
paragraph: pi,
});
});
});
//Assign word frequencies
doc.forEach((item) => {
var count = 0;
item.words.forEach((word) => {
var match = word;
doc.forEach((item2) => {
item2.words.forEach((word2) => {
if (word2 === match) count++;
});
});
});
count = count / item.words.length;
item.frequency = count;
});
doc.sort((a, b) => {
return b.frequency - a.frequency;
});
let slicePoint = 0;
let scores = doc.map((e) => e.frequency);
let avg = average(scores);
for (let i = 0; i < scores.length; i++) {
if (avg - average(scores.slice(i)) > 0.4) {
slicePoint = i;
break;
}
}
doc = doc.slice(1, slicePoint);
return doc;
}
function average(array) {
return array.reduce((a, b) => a + b) / array.length;
}
function breakSentence(sentences) {
return sentences
.replaceAll(/\s+/g, " ")
.replace(/\[[0-9]+\]/g, "")
.replace(/(?<!Mr|Mrs|Ms|Dr|Sr)([\.?\??\!?]) ([A-Z])/gi, "$1{break}$2")
.split("{break}");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment