Created
January 17, 2022 07:28
-
-
Save fredriccliver/16f4b66e9ebd777ffb1841ed84081cad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// e.g. | |
// node index.js https://googe.com | |
import { readSync } from "to-vfile"; | |
import { toString } from "nlcst-to-string"; | |
import { retext } from "retext"; | |
import retextPos from "retext-pos"; | |
import retextKeywords from "retext-keywords"; | |
import fetch from "node-fetch"; | |
import { JSDOM } from "jsdom"; | |
const targetUrl = process.argv[2]; | |
fetch(targetUrl) | |
.then((response) => response.text()) | |
.then((text) => { | |
const dom = new JSDOM(text); | |
const textContent = Array.from(dom.window.document.querySelectorAll("p")) | |
.map((e) => e.textContent) | |
.join(" "); | |
// print paragraphs | |
// console.log(textContent); | |
extractKeywords(textContent) | |
}); | |
function extractKeywords(p) { | |
retext() | |
.use(retextPos) // Make sure to use `retext-pos` before `retext-keywords`. | |
.use(retextKeywords, { maximum: 10 }) | |
// .use(retextKeywords) | |
.process(p) | |
.then((p) => { | |
console.log("Keywords:"); | |
p.data.keywords.forEach((keyword) => { | |
if (keyword.score < 0.4) return false; | |
console.log( | |
`${toString(keyword.matches[0].node)} (SCORE:${ | |
Math.floor(keyword.score * 100) / 100 | |
} WEIGHT:${keyword.weight})` | |
); | |
}); | |
console.log("Key-phrases:"); | |
p.data.keyphrases.forEach((phrase) => { | |
if (phrase.score < 0.01 || phrase.weight < 10) return false; | |
console.log( | |
`${phrase.matches[0].nodes.map((d) => toString(d)).join("")} (SCORE:${ | |
Math.floor(phrase.score * 100) / 100 | |
} WEIGHT:${phrase.weight})` | |
); | |
}); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment