Created
October 13, 2023 06:25
-
-
Save EMPAT94/e73bfea9188cbb4d1b919ca56d93aa84 to your computer and use it in GitHub Desktop.
Deno Scraper for Royalroad.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*** | |
* DESCRIPTION: | |
* A small scraper to fetch chapters of a story from RoyalRoad.com. | |
* Give it the url of a chapter and it pulls all chapters until the end! | |
* | |
* USAGE: deno run scrape.ts <royalroad.com/fiction/x/y/chapter/x/y [text | html (default)] | |
* | |
* OUTPUT: story.[txt | html] | |
*/ | |
import cheerio from "https://dev.jspm.io/cheerio"; | |
const STARTING_CHAPTER_URL = Deno.args[0]; | |
const FILE_FORMAT = Deno.args[1] === "text" ? "text" : "html"; | |
const FILE_PATH = `./story.${FILE_FORMAT === "text" ? "txt" : "html"}`; | |
async function* fetchChapter() { | |
let nextChapterLink = Deno.args[0]; | |
while (nextChapterLink) { | |
console.log("Fetching", nextChapterLink); | |
const res = await fetch(nextChapterLink); | |
const html = await res.text(); | |
const $ = cheerio.load(html); | |
const chapterContent = $("div.chapter-content"); | |
yield chapterContent[FILE_FORMAT](); | |
nextChapterLink = $('a.btn.btn-primary:contains("Next")')?.attr("href"); | |
if (nextChapterLink) | |
nextChapterLink = "https://www.royalroad.com" + nextChapterLink; | |
} | |
} | |
async function main() { | |
const outputStream = await ( | |
await Deno.open(FILE_PATH, { | |
create: true, | |
append: true, | |
}) | |
).writable.getWriter(); | |
const encoder = new TextEncoder(); | |
for await (const content of fetchChapter()) | |
outputStream.write(encoder.encode(content)); | |
await outputStream.close(); | |
console.log("All Done!"); | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment