Skip to content

Instantly share code, notes, and snippets.

@fredriccliver
Created January 17, 2022 07:28
Show Gist options
  • Save fredriccliver/16f4b66e9ebd777ffb1841ed84081cad to your computer and use it in GitHub Desktop.
Save fredriccliver/16f4b66e9ebd777ffb1841ed84081cad to your computer and use it in GitHub Desktop.
// e.g.
// node index.js https://googe.com
import { readSync } from "to-vfile";
import { toString } from "nlcst-to-string";
import { retext } from "retext";
import retextPos from "retext-pos";
import retextKeywords from "retext-keywords";
import fetch from "node-fetch";
import { JSDOM } from "jsdom";
const targetUrl = process.argv[2];
fetch(targetUrl)
.then((response) => response.text())
.then((text) => {
const dom = new JSDOM(text);
const textContent = Array.from(dom.window.document.querySelectorAll("p"))
.map((e) => e.textContent)
.join(" ");
// print paragraphs
// console.log(textContent);
extractKeywords(textContent)
});
function extractKeywords(p) {
retext()
.use(retextPos) // Make sure to use `retext-pos` before `retext-keywords`.
.use(retextKeywords, { maximum: 10 })
// .use(retextKeywords)
.process(p)
.then((p) => {
console.log("Keywords:");
p.data.keywords.forEach((keyword) => {
if (keyword.score < 0.4) return false;
console.log(
`${toString(keyword.matches[0].node)} (SCORE:${
Math.floor(keyword.score * 100) / 100
} WEIGHT:${keyword.weight})`
);
});
console.log("Key-phrases:");
p.data.keyphrases.forEach((phrase) => {
if (phrase.score < 0.01 || phrase.weight < 10) return false;
console.log(
`${phrase.matches[0].nodes.map((d) => toString(d)).join("")} (SCORE:${
Math.floor(phrase.score * 100) / 100
} WEIGHT:${phrase.weight})`
);
});
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment