Skip to content

Instantly share code, notes, and snippets.

@shbaik82
Last active October 1, 2015 15:04
Show Gist options
  • Save shbaik82/de33d33579df6ad1ae26 to your computer and use it in GitHub Desktop.
Save shbaik82/de33d33579df6ad1ae26 to your computer and use it in GitHub Desktop.
HTML table scraping code using cheerio.js
// table scraping code
var fs = require('fs')
// , cheerio = require('cheerio')
, _ = require('lodash');
// var html = fs.readFileSync('./target.html', 'utf-8');
// var $ = cheerio.load(html);
// take cheerio object(dom) and returns cheerio object(table)
var findTableWithString = function(html, string) {
var tables = html('body').find('table')
.filter(function(i, e) {
return html(this).html().indexOf(string) != -1;
})
.map(function(i, e) {
return {'length': html(this).html().length, 'node': html(this)};
});
var tables = _.toArray(tables);
var result = _.reduce(tables, function(acc, curr) {
if (acc.length < curr.length) return acc;
else return curr;
});
return result.node;
};
// var shortestTable = findTableWithString($, '6-1-1');
// console.log(shortestTable.html());
exports.findTableWithString = findTableWithString;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment