Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Gets micro-data from html
// required stuff
var _ = require('lodash'),
cheerio = require('cheerio');
// exporting outside
module.exports = {
/**
* Converts a html code to the object
* @param {String} html
* @returns {Object}
*/
getMicrodata: function (html) {
var $ = cheerio.load(html, {ignoreWhitespace: true}),
scopeSelector = '[itemscope][itemtype]:not([itemscope] [itemscope])',
propSelector = '[itemprop]:not([itemscope] [itemprop]):not([itemscope])',
self = this, result = {};
$(scopeSelector).each(function () {
var itemType = this.attr('itemtype'), props = {};
$(propSelector, $(this).html()).each(function () {
var propName = this.attr('itemprop'), value;
switch (this[0].name) {
case 'audio':
case 'embed':
case 'iframe':
case 'img':
case 'source':
case 'track':
case 'video':
value = this.attr('src');
break;
case 'a':
case 'area':
case 'link':
value = this.attr('href');
break;
case 'object':
value = this.attr('data');
break;
case 'meta':
value = this.attr('content');
break;
default:
value = this.text().trim();
break;
}
if (!_.has(props, propName)) {
props[propName] = value;
return;
}
if (_.isArray(props[propName])) {
props[propName].push(value);
return;
}
props[propName] = [props[propName], value];
});
var tmp = {}, keyName = itemType.replace('http://schema.org/', '');
tmp[keyName] = _.extend(props, self.getMicrodata($(this).html()));
_.merge(result, tmp);
});
return result;
}
};
@kkamkou

This comment has been minimized.

Copy link
Owner Author

kkamkou commented Jan 19, 2014

Requires

lodash and the latest version of cheerio

Code

console.log(myVar.getMicrodata('html goes here'));

Example

Example

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.