Skip to content

Instantly share code, notes, and snippets.

@maggocnx
Last active August 29, 2015 14:21
Show Gist options
  • Save maggocnx/e4df0f3178613fe7f60e to your computer and use it in GitHub Desktop.
Save maggocnx/e4df0f3178613fe7f60e to your computer and use it in GitHub Desktop.
Scraping Amazon Orders German
var casper = require('casper').create({ pageSettings: { webSecurityEnabled: false }});
casper.userAgent('Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)');
var numberOfOrders = 0;
casper.on('remote.message', function(msg) {
if(msg.indexOf(">")== 0){
this.echo(msg);
}
})
var loginUrl = "https://www.amazon.de/ap/signin/275-8205473-8265603?_encoding=UTF8&openid.assoc_handle=deflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.de%2Fgp%2Fcss%2Fhomepage.html%3Fie%3DUTF8%26ref_%3Dnav_yam_ya"
casper.start(loginUrl, function() {
this.echo(this.getTitle());
this.fill('form#ap_signin_form', {
email : "marco@gronic.de",
password : "manila"
}, true)
});
// casper.then(function(){
// casper.download("https://amazon.de/gp/css/order/static-display/get-legal-invoice.html/ref=oh_aui_legal_invoice?ie=UTF8&orderId=304-2959266-4413936", "./invoices/invoice.pdf")
// })
casper.then(function(){
casper.open("https://www.amazon.de/gp/css/order-history/ref=nav_gno_yam_yrdrs");
})
casper.then(function(){
casper.capture("capture.png")
})
// casper.then(getPageOrders);
casper.run(function() {
this.echo('Done.').exit();
});
function getPageOrders() {
casper.page.injectJs("./jquery.min.js");
casper.page.injectJs("./moment.min.js");
var orders = this.evaluate(parsePage);
var nextLink = "li.a-last > a";
if (casper.visible(nextLink)) {
casper.thenClick(nextLink);
casper.then(getPageOrders);
numberOfOrders++;
} else {
casper.echo("END " + numberOfOrders)
}
}
function parsePage(){
var host;
var orderBoxes = $(".a-box-group");
var filters = [];
var filterNames = ["M. Grieb"]
var sellerRegex = /seller=[a-zA-Z0-9]*/;
moment.locale("de-DE")
$.each(orderBoxes, function(index, orderBox){
var order;
var $orderBox = $(orderBox);
var orderDetails = $orderBox.find(".value");
var orderId = $.trim($(orderDetails[3]).text())
var productBoxes = $orderBox.find(".a-fixed-left-grid-inner");
var orderDate = $.trim($(orderDetails[0]).text());
var order = {
id : orderId,
price : parseFloat($.trim($(orderDetails[1]).text()).replace("EUR ", "").replace(",", ".")),
orderedBy : $.trim($(orderDetails[2]).text()),
date : parseInt(moment(orderDate, "DD. MMMM YYYY").format("X")) * 1000
};
console.log("> " + order.id);
order.products = [];
$.each(productBoxes, function(index, productBox){
// console.log("> xxx" + order.id);
$productBox = $(productBox);
var img = $productBox.find("img")
var imgUrl =$(img[0]).attr("src");
var fields = $productBox.find(".a-link-normal");
var nameEl = $(fields[1]);
var sellerEl = $(fields[2]);
var sellerUrl = sellerEl.attr("href");
product = {
name : $.trim(nameEl.text()),
imgUrl : imgUrl,
url : nameEl.attr("href"),
sellerName : $.trim(sellerEl.text()),
sellerUrl : sellerUrl,
orderId : order.id,
notes : "",
status : "open"
}
if(productBoxes.length > 1 ){
product.price = "0.0"
}
else{
product.price = order.price;
}
if(sellerUrl){
product.sellerId = sellerUrl.match(sellerRegex)[0].replace("seller=", "");
}
else{
// AmazonOrder
}
console.log("> " + product.name.substring(0,50) + "\t\t" + product.price)
order.products.push(order);
});
orders.push = order;
return orders;
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment