Skip to content

Instantly share code, notes, and snippets.

@Erushenko
Last active February 14, 2019 07:49
Show Gist options
  • Save Erushenko/6641ab1a790237209e2e7cd131278a97 to your computer and use it in GitHub Desktop.
Save Erushenko/6641ab1a790237209e2e7cd131278a97 to your computer and use it in GitHub Desktop.
#bluebird #promise #parser
const cheerio = require("cheerio")
const Promise = require("bluebird")
const request = Promise.promisify(require("request"))
const csvStringify = require('csv-stringify')
const fs = require('fs')
const URL = 'https://dou.ua/forums'
request(URL)
.then(topics)
.mapSeries(topic => request(topic.url).then(res => topicContents(res, topic)))
.then(data => data.map(item => Object.values(item)))
.then(writeFileCsv)
.catch(err => {
console.log("Error", err)
})
function topics(res) {
const $ = cheerio.load(res.body);
return $("div.b-forum-articles article > h2 > a:first-child")
.map((i, link) => ({
title: link.attribs.title,
url: link.attribs.href,
commentsCount: 0
}))
.toArray()
}
function topicContents(res, topic) {
const $ = cheerio.load(res.body)
const commentsCount = parseInt($('#lblCommentsCount').text())
topic.commentsCount = Number.isNaN(commentsCount) ? 0 : commentsCount
return topic
}
function writeFileCsv(data) {
csvStringify(data, {
delimiter: ';',
header: true,
formatters: {
date: it => moment(it).format('YYYY-MM-DD'),
bool: it => it ? 'X' : '',
},
},
(err, output) => {
fs.writeFileSync(getNameFile('./tmp/some-parse'), output, err => {
if (err) {
console.log('Some error occurred - file either not saved or corrupted file saved.', err)
} else {
console.log('It\'s saved!')
}
})
}
)
}
function getNameFile(prefix) {
return [
prefix,
new Date().getFullYear(),
new Date().getMonth() + 1,
new Date().getDate(),
new Date().getHours(),
].join('-') + '.csv'
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment