Skip to content

Instantly share code, notes, and snippets.

@kkamkou
Created October 24, 2013 12:34
Show Gist options
  • Save kkamkou/7136409 to your computer and use it in GitHub Desktop.
Save kkamkou/7136409 to your computer and use it in GitHub Desktop.
Gets micro-data from html
// required stuff
var _ = require('lodash'),
cheerio = require('cheerio');
// exporting outside
module.exports = {
/**
* Converts a html code to the object
* @param {String} html
* @returns {Object}
*/
getMicrodata: function (html) {
var $ = cheerio.load(html, {ignoreWhitespace: true}),
scopeSelector = '[itemscope][itemtype]:not([itemscope] [itemscope])',
propSelector = '[itemprop]:not([itemscope] [itemprop]):not([itemscope])',
self = this, result = {};
$(scopeSelector).each(function () {
var itemType = this.attr('itemtype'), props = {};
$(propSelector, $(this).html()).each(function () {
var propName = this.attr('itemprop'), value;
switch (this[0].name) {
case 'audio':
case 'embed':
case 'iframe':
case 'img':
case 'source':
case 'track':
case 'video':
value = this.attr('src');
break;
case 'a':
case 'area':
case 'link':
value = this.attr('href');
break;
case 'object':
value = this.attr('data');
break;
case 'meta':
value = this.attr('content');
break;
default:
value = this.text().trim();
break;
}
if (!_.has(props, propName)) {
props[propName] = value;
return;
}
if (_.isArray(props[propName])) {
props[propName].push(value);
return;
}
props[propName] = [props[propName], value];
});
var tmp = {}, keyName = itemType.replace('http://schema.org/', '');
tmp[keyName] = _.extend(props, self.getMicrodata($(this).html()));
_.merge(result, tmp);
});
return result;
}
};
@kkamkou
Copy link
Author

kkamkou commented Jan 19, 2014

Requires

lodash and the latest version of cheerio

Code

console.log(myVar.getMicrodata('html goes here'));

Example

Example

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment