Last active
October 1, 2015 15:04
-
-
Save shbaik82/de33d33579df6ad1ae26 to your computer and use it in GitHub Desktop.
HTML table scraping code using cheerio.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// table scraping code | |
var fs = require('fs') | |
// , cheerio = require('cheerio') | |
, _ = require('lodash'); | |
// var html = fs.readFileSync('./target.html', 'utf-8'); | |
// var $ = cheerio.load(html); | |
// take cheerio object(dom) and returns cheerio object(table) | |
var findTableWithString = function(html, string) { | |
var tables = html('body').find('table') | |
.filter(function(i, e) { | |
return html(this).html().indexOf(string) != -1; | |
}) | |
.map(function(i, e) { | |
return {'length': html(this).html().length, 'node': html(this)}; | |
}); | |
var tables = _.toArray(tables); | |
var result = _.reduce(tables, function(acc, curr) { | |
if (acc.length < curr.length) return acc; | |
else return curr; | |
}); | |
return result.node; | |
}; | |
// var shortestTable = findTableWithString($, '6-1-1'); | |
// console.log(shortestTable.html()); | |
exports.findTableWithString = findTableWithString; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment