Skip to content

Instantly share code, notes, and snippets.

@kaz
Created November 29, 2019 08:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kaz/1a98d932539937166ab1a1c8ca53657f to your computer and use it in GitHub Desktop.
Save kaz/1a98d932539937166ab1a1c8ca53657f to your computer and use it in GitHub Desktop.
aws excersise scraper
"use strict";
const iconv = require("iconv-lite");
const fetch = require("node-fetch");
const cheerio = require("cheerio");
(async () => {
const contents = [];
for(let i = 1; i <= 246; i++){
for(const suffix of ["", "-answer"]){
const url = `https://awsjp.com/exam/saa-sap/c/exam${i}${suffix}.html`;
const resp = await fetch(url);
console.error(resp.status, url);
const content = iconv.decode(await resp.buffer(), "Shift_JIS");
const $ = cheerio.load(content, {decodeEntities: false});
contents.push(`<br><br><hr><h2>Exam ${i}${suffix}</h2><hr><br><br>${$("body").html()}`);
}
}
console.log(`<html lang="ja"><head><meta charset="utf-8"></meta><body>${contents.join("")}</body></html>`);
})();
{
"dependencies": {
"cheerio": "^1.0.0-rc.3",
"iconv-lite": "^0.5.0",
"node-fetch": "^2.6.0"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment