Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save talcual/61cebaed1933eb5524c2f66453dfff48 to your computer and use it in GitHub Desktop.
Save talcual/61cebaed1933eb5524c2f66453dfff48 to your computer and use it in GitHub Desktop.
Scrape Amazon Seller central orders using casperjs and phantomjs
String.prototype.trim=function(){return this.replace(/^\s+|\s+$/g, '');}
String.prototype.ltrim=function(){return this.replace(/^\s+/,'');};
String.prototype.rtrim=function(){return this.replace(/\s+$/,'');};
String.prototype.fulltrim=function(){return this.replace(/(?:(?:^|\n)\s+|\s+(?:$|\n))/g,'').replace(/\s+/g,' ');};
Date.prototype.MMDDYYYY = function() {
var yyyy = this.getUTCFullYear().toString();
var mm = (this.getUTCMonth()+1).toString(); // getMonth() is zero-based
var dd = this.getUTCDate().toString();
return (mm[1]?mm:"0"+mm[0]) + "/" + (dd[1]?dd:"0"+dd[0]) + "/" + yyyy;
};
var casper = require('casper').create({
verbose: true,
logLevel: 'error',
pageSettings: {
loadImages: false,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36'
}
});
var cli_username = cli_password = cli_filename = cli_filepath = cli_fromdate = cli_todate = "";
if(!casper.cli.has("email") || !casper.cli.has("password") || !casper.cli.has("filename") || !casper.cli.has("filepath") || !casper.cli.has("fromdate") || !casper.cli.has("todate")){
casper.echo("");
casper.echo("");
casper.echo("Something is missing");
casper.echo("How to use this script");
casper.echo("Example");
casper.echo("");
casper.echo("casperjs.exe amazon_pull_orders.js --email=myemail@gmail.com --password=mypassword --filename=\"orders_of_july.csv\" --filepath=\"D:\\downloaded_csv\" --fromdate=23/11/2011 --todate=25/11/2011");
casper.echo("");
casper.echo("");
casper.echo("fromdate and todate should be in DD/MM/YYYY format");
casper.echo("");
casper.echo("");
casper.exit(1);
}
cli_username = casper.cli.get("email");
cli_password = casper.cli.get("password");
cli_filename = casper.cli.get("filename");
cli_filepath = casper.cli.get("filepath");
cli_fromdate = casper.cli.get("fromdate");
cli_todate = casper.cli.get("todate");
var utils = require('utils');
var fs = require('fs');
var x = require('casper').selectXPath;
var screenshot = "tasveer.png";
var waittime = 8000; /// in milliseconds
if(!fs.isDirectory(cli_filepath) && cli_filepath.length>0){
this.echo(cli_filepath + " doesn't exists");
this.exit(1);
}
if(!fs.isWritable(cli_filepath) && cli_filepath.length>0){
this.echo(cli_filepath + " is not writable");
this.exit(1);
}
var outputfilename = (cli_filepath.length>0?cli_filepath + "/":"") + cli_filename;
var listingpages = 1;
var orderlinks = [];
var header = "Order ID,Order Date,Ship Address,Ship Country,Buyer Username,Sales Channel,Items Total,Delivery total,Refunds Total,Grand Total,Order Line,SKU,Quantity Ordered,Quantity Shipped,Price Each,Shipping,Refund,Status";
if(fs.exists(screenshot)){
fs.remove(screenshot);
}
if(fs.exists(outputfilename)){
fs.remove(outputfilename);
}
var stream = fs.open(outputfilename,"w");
SanitizeString = function(str){
if(str==null){
str = '';
}
str = str.trim();
var returnstring = str.replace(/"/g,'');
returnstring = returnstring.replace(/\n/g," ");
returnstring = returnstring.replace(/\$/g,"");
returnstring = returnstring.replace(/£/g,"");
returnstring = returnstring.replace(/€/g,"");
returnstring = '"' + returnstring.fulltrim() + '"';
return returnstring;
}
PickOrderDetail = function(c,link,stream){
c.thenOpen(link,function(){
var invoice_number = invoice_date = buyer_name = buyer_email = ship_counry = sales_channel = shipping_address = items_total = delivery_total = grand_total = refund_total = "";
var str_frm_url = link.match(/orderID=\d+-\d+-\d+/g);
if(str_frm_url){
var str_pieces = str_frm_url[0].split("=");
invoice_number = str_pieces[1];
this.echo("Getting detail of order " + invoice_number);
var thedata = this.evaluate(function(){
var jsondata = {};
jsondata.orderdatetext = $("td.data-display-field:contains('Order Date'):eq(1)").next().text();
jsondata.buyername = $("span#_myo_buyerEmail_progressIndicator").prev().text();
jsondata.buyeremail = $("span#_myo_buyerEmail_progressIndicator").prev().attr("href");
var ship_address_pieces = $("td.data-display-field strong:contains('Shipping Address')").parent().html().replace("<strong>Shipping Address</strong><br>","").split("<br>");
jsondata.shipcountry = ship_address_pieces[ship_address_pieces.length-2];
jsondata.saleschannel = $("td.data-display-field:contains('Sales channel'):eq(1)").next().text();
jsondata.shippingaddress = $("strong:contains('Shipping Address')").parent().html().replace("<strong>Shipping Address</strong><br>","").replace(/<br>/g,", ");
jsondata.itemstotal = $("td.data-display-field[align=right]:contains('Items total:')").parent().find("td:nth-child(2)").text();
jsondata.deliverytotal = $("td.data-display-field[align=right]:contains('Delivery total:')").parent().find("td:nth-child(2)").text();
jsondata.grandtotal = $("td.data-display-field[align=right]:contains('Grand Total')").parent().find("td:nth-child(2)").text();
jsondata.refundtotal = $("td.data-display-field[align=right]:contains('Refund(s) total:')").parent().find("td:nth-child(2)").text().replace(/\(|\)/g,"");
var order_items_text = [];
$.each($("table.data-display:eq(3) tbody:eq(0)").children("tr"),function(){
if($(this).hasClass("list-row-odd") || $(this).hasClass("list-row-even")){
var tr_text = $(this).text().replace(/(\r\n|\n|\t)/g,"");
sku = tr_text.match(/SKU: +\w+(?:-\w+)*/g);
if(sku){
sku_pieces = sku[0].split(":");
if(sku_pieces[1]){
sku = sku_pieces[1];
}
}
quantity_ordered = $(this).children("td:eq(2)").text();
quantity_shipped = $(this).children("td:eq(3)").text();
price_each = $(this).children("td:eq(4)").text();
ship_charged = $(this).children("td:eq(5)").find("table tbody tr:eq(1) td:eq(1)").text();
order_status = $(this).children("td:eq(1)").text();
refund = $(this).children("td:nth-child(6)").find("table tr:nth-child(3) td:nth-child(2)").text().replace(/\(|\)/g,"");
order_items_text.push({
sku:sku,
quantity_ordered:quantity_ordered,
quantity_shipped:quantity_shipped,
price_each:price_each,
ship_charged:ship_charged,
order_status:order_status,
refund:refund
});
}
});
jsondata.orderitems = order_items_text;
return jsondata;
});
invoice_date = thedata.orderdatetext;
buyer_name = thedata.buyername;
buyer_email = thedata.buyeremail;
ship_country = thedata.shipcountry;
sales_channel = thedata.saleschannel;
shipping_address = thedata.shippingaddress;
shipping_address = shipping_address.replace(buyer_name + ", ","");
items_total = thedata.itemstotal;
delivery_total = thedata.deliverytotal;
grand_total = thedata.grandtotal;
refund_total = thedata.refundtotal;
invoice_number = SanitizeString(invoice_number);
invoice_date = SanitizeString(invoice_date);
buyer_name = SanitizeString(buyer_name);
buyer_email = SanitizeString(buyer_email);
ship_country = SanitizeString(ship_country);
sales_channel = SanitizeString(sales_channel);
shipping_address = SanitizeString(shipping_address);
items_total = SanitizeString(items_total);
delivery_total = SanitizeString(delivery_total);
grand_total = SanitizeString(grand_total);
refund_total = SanitizeString(refund_total);
for(var dataitemindex=0;dataitemindex<thedata.orderitems.length;dataitemindex++){
order_line = dataitemindex+1;
sku = SanitizeString(thedata.orderitems[dataitemindex].sku);
quantity_ordered = SanitizeString(thedata.orderitems[dataitemindex].quantity_ordered);
quantity_shipped = SanitizeString(thedata.orderitems[dataitemindex].quantity_shipped);
price_each = SanitizeString(thedata.orderitems[dataitemindex].price_each);
ship_charged = SanitizeString(thedata.orderitems[dataitemindex].ship_charged);
order_status = SanitizeString(thedata.orderitems[dataitemindex].order_status);
refund = SanitizeString(thedata.orderitems[dataitemindex].refund);
stream.writeLine(invoice_number + "," + invoice_date + "," + shipping_address + "," + ship_country + "," + buyer_name + "," + sales_channel + "," + items_total + "," + delivery_total + "," + refund_total + "," + grand_total + "," + order_line + "," + sku + "," + quantity_ordered + "," + quantity_shipped + "," + price_each + "," + ship_charged + "," + refund + "," + order_status);
}
}
});
}
PickOrdersFromListing = function(c){
var orderlinkhrefs = c.getElementsInfo('tr.order-row td:nth-child(4) a');
c.echo("Picked " + orderlinkhrefs.length + " orders");
for(var cindex=0;cindex<orderlinkhrefs.length;cindex++){
orderlinks.push(orderlinkhrefs[cindex].attributes.href);
}
}
casper.on('remote.message', function(msg) {
//this.echo(msg);
})
casper.start('https://sellercentral.amazon.co.uk/gp/homepage.html',function(){
if(!this.exists("input#username")){
this.echo("Unable to found amazon login page");
this.exit(1);
}
this.echo("Logging in to seller central");
this.fillSelectors('form[name=signinWidget]', {
'input#username': cli_username,
'input#password': cli_password
}, true);
});
casper.then(function(){
if(!this.exists("li#sc-quicklink-settings")){
this.echo("Login failed. Invalid email or password");
this.exit(1);
}
});
casper.thenOpen("https://sellercentral.amazon.co.uk/gp/orders-v2/search/ref=ag_myosearch_apsearch_myo",function(){
this.click('input#_myoSO_SearchOption_exactDates');
this.click('input#_myoSO_ShowPendingCheckBox');
this.evaluate(function(cli_fromdate,cli_todate){
document.querySelector('input#exactDateBegin').value = cli_fromdate;
document.querySelector('input#exactDateEnd').value = cli_todate;
document.querySelector('select#sortBy').value = "OrderDateDescending";
},{
cli_fromdate:cli_fromdate,
cli_todate:cli_todate
});
this.click('button#_myoSO_SearchButton');
this.echo("Filled advanced search form and initiated search");
});
casper.then(function(){
var currentlink = this.getCurrentUrl();
var order_not_found = currentlink.match(/dmCode=\w+/g);
if(order_not_found){
this.click("li.sc-logout-quicklink a");
this.echo("No orders found for this date range");
this.exit(1);
}
this.echo("Setting items per page to 100");
var urlwith100items = currentlink + "&itemsPerPage=100";
this.open(urlwith100items);
})
casper.then(function(){
this.waitWhileVisible("tr#_myoQL_progressIndicator",function then(){
var pages = this.evaluate(function(){
return $("a.myo_list_orders_link:last-child").prev()[0].text;
});
if(pages){
listingpages = parseInt(pages);
}
PickOrdersFromListing(this);
});
});
casper.then(function(){
this.repeat(listingpages-1,function(){
var nextlink = this.getElementInfo("a.myo_list_orders_link:last-child");
if(nextlink.text=="Next"){
this.thenOpen(nextlink.attributes.href,function(){
this.wait(waittime,function(){
PickOrdersFromListing(this);
});
});
}
});
})
casper.then(function(){
this.echo("Now going to download " + orderlinks.length + " orders");
stream.writeLine(header);
for(var m=0;m<orderlinks.length;m++){
PickOrderDetail(this,orderlinks[m],stream);
}
});
casper.thenClick("li.sc-logout-quicklink a");
casper.then(function(){
stream.close();
stream.flush();
});
casper.run();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment