Skip to content

Instantly share code, notes, and snippets.

@cou929
Created September 20, 2010 15:50
Show Gist options
  • Save cou929/588110 to your computer and use it in GitHub Desktop.
Save cou929/588110 to your computer and use it in GitHub Desktop.
Code snippet to scrape web site using jquery.
/**
* scrape.js
* Code snippet to scrape web site using jquery.
* This code is from WEB+DB Press Vol.51 JavaScript 今ドキ活用術 by Shinichi Tomita
*/
jQuery.noConflict();
(function($) {
jQuery.fn.scrape = function(def) {
// get text value from DOM node
function getText(n) {
return n && $(n).text();
}
// make function which gets attribute value from DOM node
function getAttr(a) {
return function(n) {
return n && $(n).attr(a);
};
}
// make function which construct Object from definition object
function getRecord(o) {
return function(n) {
if (!n) return null;
var rec = {};
for (var k in o) {
if (o.hasOwnProperty(k))
rec[k] = $(n).scrape(o[k]);
}
return rec;
};
}
if (typeof def == 'string')
def = {'selector': def};
var value = def.value;
var type = typeof value;
var toValue =
type == 'function' ? value :
type == 'object' ? getRecord(value) :
type == 'string' && value.indexOf('@') == 0 ? getAttr(value.substring(1)) :
getText;
return def.array ?
this.find(def.selector).map(function() {return toValue(this);}) :
toValue(this.find(def.selector).get(0));
};
})(jQuery);
// ===========
// how to use
// ===========
//
// var record = $(document).scrape({
// selector: '#foo > div.bar',
// array: true,
// value: {
// title: 'div:first > div.foo a',
// link: {
// selector: 'div:first > div.foo a',
// value: '@href'
// },
// price: {
// selector: 'span.price',
// value: function(n) {
// return parseInt(n.firstChild.data.replace(/[^\d]/g, ''));
// }
// }
// }
// });
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment