Skip to content

Instantly share code, notes, and snippets.

@withinboredom
Last active August 29, 2015 14:20
Show Gist options
  • Save withinboredom/8b5e67869486dd202e6b to your computer and use it in GitHub Desktop.
Save withinboredom/8b5e67869486dd202e6b to your computer and use it in GitHub Desktop.
a dumb js scraper using iframes
(function() {
var items = [];
window.inventory = [];
window.errors = [];
Array.prototype.clean = function(deleteValue) {
for (var i = 0; i < this.length; i++) {
if (this[i] == deleteValue) {
this.splice(i, 1);
i--;
}
else {
this[i] = this[i].trim();
}
}
return this;
};
var categories = ['subcat_964', 'subcat_73'];
for (var i = 0; i < categories.length; i++) {
$("." + categories[i]).each(function (index, tag) {
items.push($(tag).find('a').attr('href'));
});
}
var getAll = function(index) {
if (index == null) { index = 0 }
$("<iframe class='crapper' src='" + items[index] + "'></iframe>")
.appendTo('body')
.load(function() {
try {
frames[0].Array.prototype.clean = Array.prototype.clean;
var obj = {};
obj.name = frames[0].$('.detailtitle').text();
obj.science = frames[0].$('span i').text();
obj.description = frames[0].$('span p').text().trim();
var deets = frames[0].$('.category-description');
obj.sku = $(deets.get(0)).text().trim().split("#")[1];
obj.wholesale = $(deets.get(1)).html().split("<br>")[1].split("$")[1].trim();
obj.retail = $(deets.get(2)).html().split("<br>")[1].split("$")[1].trim();
obj.size = $(deets.get(4)).text().trim().split(" ");
obj.benefits = frames[0].$(frames[0].$("#tabs li :contains(Benefits)")
.attr("href"))
.html()
.replace(/&nbsp;/g, "")
.replace(/\n/g, "")
.replace(/\t/g, "")
.replace(/•/g, "")
.trim()
.split("<br>").clean("");
var resources = frames[0].$(frames[0].$("#tabs li :contains(Resources)").attr("href") + " a")[1];
obj.picture = typeof resources === "undefined" ? "none" :
$(resources).attr("href");
window.inventory.push(obj);
}
catch (ex) {
try {
errors.push({
item: items[index],
exception: ex
});
}
catch (ex) {
console.log("failed to retrieve item for inventory");
}
}
finally {
$('iframe').remove();
}
var wait = 100 + Math.random(1000);
setTimeout(function() {
if (index + 1 < items.length) {
getAll(index + 1);
}
}, wait);
});
};
getAll();
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment