import { getPos } from "./getPos.ts";
const pos = await getPos("سڵاو");
console.log(pos); // Expected result: [ "noun" ]
Last active
June 13, 2022 08:05
-
-
Save roj1512/419cb13e2a9812245cc609b8821ccb07 to your computer and use it in GitHub Desktop.
Scrape VejînLex to find the parts of speech of a Kurdish word and return the results as an array
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { DOMParser } from "https://deno.land/x/deno_dom@v0.1.30-alpha/deno-dom-wasm.ts"; | |
export async function getPos(word: string) { | |
return [ | |
...new Set( | |
[ | |
...(new DOMParser() | |
.parseFromString( | |
await ( | |
await fetch( | |
`https://lex.vejin.net/ck/search?${new URLSearchParams({ | |
t: word, | |
f: "0", | |
d: "3", | |
})}`, | |
) | |
).text(), | |
"text/html", | |
) | |
?.querySelectorAll(".wah > span.tag.pos") || []), | |
] | |
// deno-lint-ignore no-explicit-any | |
.map((v: any) => v.innerText) | |
.map((v) => v.replace("and", "")) | |
.map((v) => v.split(",")) | |
.flat() | |
.map((v) => v.split(/\s/)) | |
.flat() | |
.map((v) => v.trim()) | |
.filter((v) => v), | |
), | |
]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment