Skip to content

Instantly share code, notes, and snippets.

@daniellizik
Created November 4, 2016 19:54
Show Gist options
  • Save daniellizik/85cf5a3d35802018255f7471db867072 to your computer and use it in GitHub Desktop.
Save daniellizik/85cf5a3d35802018255f7471db867072 to your computer and use it in GitHub Desktop.
scraper
'use strict';
const request = require('request-promise');
const cheerio = require('cheerio');
class Scraper {
constructor(opts) {
this.params = opts.params;
this.action = opts.action;
this.reduce = opts.reduce;
this.schema = opts.schema;
this.init();
}
makeUrl() {
return Object
.keys(this.params)
.reduce((acc, k) => {
const format = this.params[k].format;
const produced = format? format.call(this) : this.params[k].curr;
return acc.replace(`:${k}`, produced);
}, this.schema);
}
init() {
const url = this.makeUrl();
request(url)
.then(html => cheerio.load(html))
.then($ => this.action(url, $))
.then(res => this.params = this.reduce(res))
.catch(e => console.log(e))
}
}
module.exports = Scraper;
'use strict';
const test = require('tape');
const Scraper = require('./scraper');
const scraper = new Scraper({
schema: 'http://localhost:9999/:a/:b/:c',
params: {
a: {
curr: 'fixtures'
},
b: {
curr: 'sub-a'
},
c: {
curr: 'fixture.html',
format: function() {
return this.params.c.curr;
const curr = this.params.c.curr;
return curr === '' ? '' : ('000' + curr).slice(-3) + '.html';
}
}
},
action: function(url, $) {
return new Promise((resolve, reject) => {
resolve($('#bob').text())
});
},
reduce: function(res) {
return Object.assign({}, this.params, {
a: this.params.a,
b: Object.assign({}, this.params.b, {curr: this.params.b.curr + 1}),
c: Object.assign({}, this.params.c, {curr: this.params.c.curr === '' ? 1 : this.params.c + 1})
});
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment