Last active
April 7, 2017 11:05
-
-
Save sgnl/b2cdc3888dc1cd020392da816a87eda0 to your computer and use it in GitHub Desktop.
Scrape website's flash card data and output HTML for markdown use
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const got = require('got') | |
const cheerio = require('cheerio') | |
const Promise = require('bluebird') | |
const fs = Promise.promisifyAll(require('fs')) | |
const baseUrl = 'https://[nope].com/comptia-a-exam/flashcards/902-windows-operating-systems/pages' | |
function getFrontCard(baseUrl, startPageNumber = 1) { | |
return got(`${baseUrl}/${startPageNumber}`) | |
.then(res => { | |
let $ = cheerio.load(res.body) | |
let front = $('.front p').text() | |
if (!front) throw Error('no card front found, bail!') | |
return front | |
}) | |
} | |
function getBackCard(baseUrl, startPageNumber = 1) { | |
let backInformation = [] | |
return got(`${baseUrl}/${startPageNumber}/back`) | |
.then(res => { | |
let $ = cheerio.load(res.body) | |
let backAnswer = $('.back p').each(function(i, e) { | |
backInformation[i] = $(this).text() | |
if (!backInformation[0]) throw Error('no card back found, bail!') | |
}) | |
return backInformation | |
}) | |
} | |
function buildCard(front, back) { | |
return ` | |
<details> | |
<summary><b>${front}</b></summary> | |
<h5>${back[0]}</h5> | |
<p>${back[1]}</p> | |
</details> | |
` | |
} | |
function writeToFile(card) { | |
return fs.appendFileAsync('902-windows-operating-systems', card) | |
} | |
// recursive function continues until all pages are exhausted | |
function getNextPage(baseUrl, pageNum = 1) { | |
console.log('fetching Page: ', pageNum); | |
return Promise.delay(500).then(() => [ | |
getFrontCard(baseUrl, pageNum), | |
getBackCard(baseUrl, pageNum) | |
]) | |
.spread(buildCard) | |
.then(writeToFile) | |
.then(() => getNextPage(baseUrl, ++pageNum)) | |
} | |
getNextPage(baseUrl, 1) | |
.then(() => { | |
console.log('done') | |
}) | |
.catch(err => console.error(err)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment