Skip to content

Instantly share code, notes, and snippets.

@sgnl
Last active April 7, 2017 11:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sgnl/b2cdc3888dc1cd020392da816a87eda0 to your computer and use it in GitHub Desktop.
Save sgnl/b2cdc3888dc1cd020392da816a87eda0 to your computer and use it in GitHub Desktop.
Scrape website's flash card data and output HTML for markdown use
const got = require('got')
const cheerio = require('cheerio')
const Promise = require('bluebird')
const fs = Promise.promisifyAll(require('fs'))
const baseUrl = 'https://[nope].com/comptia-a-exam/flashcards/902-windows-operating-systems/pages'
function getFrontCard(baseUrl, startPageNumber = 1) {
return got(`${baseUrl}/${startPageNumber}`)
.then(res => {
let $ = cheerio.load(res.body)
let front = $('.front p').text()
if (!front) throw Error('no card front found, bail!')
return front
})
}
function getBackCard(baseUrl, startPageNumber = 1) {
let backInformation = []
return got(`${baseUrl}/${startPageNumber}/back`)
.then(res => {
let $ = cheerio.load(res.body)
let backAnswer = $('.back p').each(function(i, e) {
backInformation[i] = $(this).text()
if (!backInformation[0]) throw Error('no card back found, bail!')
})
return backInformation
})
}
function buildCard(front, back) {
return `
<details>
<summary><b>${front}</b></summary>
<h5>${back[0]}</h5>
<p>${back[1]}</p>
</details>
`
}
function writeToFile(card) {
return fs.appendFileAsync('902-windows-operating-systems', card)
}
// recursive function continues until all pages are exhausted
function getNextPage(baseUrl, pageNum = 1) {
console.log('fetching Page: ', pageNum);
return Promise.delay(500).then(() => [
getFrontCard(baseUrl, pageNum),
getBackCard(baseUrl, pageNum)
])
.spread(buildCard)
.then(writeToFile)
.then(() => getNextPage(baseUrl, ++pageNum))
}
getNextPage(baseUrl, 1)
.then(() => {
console.log('done')
})
.catch(err => console.error(err))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment