Skip to content

Instantly share code, notes, and snippets.

@subelsky
Created August 8, 2012 18:51
Show Gist options
  • Save subelsky/3297506 to your computer and use it in GitHub Desktop.
Save subelsky/3297506 to your computer and use it in GitHub Desktop.
Webscraping with CasperJS, PhantomJS, jQuery, and XPath
var system = require('system');
if (system.args.length < 5) {
console.info("You need to pass in account name, username, password, and path to casperJS as arguments to this code.");
phantom.exit();
}
var account = system.args[1];
var username = system.args[2];
var password = system.args[3];
var base_uri = "https://example.com/" + account;
phantom.casperPath = system.args[4];
phantom.injectJs(phantom.casperPath + '/bin/bootstrap.js');
var utils = require('utils');
var casper = require('casper').create({
verbose: true,
logLevel: 'debug'
});
casper.on('error', function(msg,backtrace) {
this.echo("=========================");
this.echo("ERROR:");
this.echo(msg);
this.echo(backtrace);
this.echo("=========================");
});
casper.on("page.error", function(msg, backtrace) {
this.echo("=========================");
this.echo("PAGE.ERROR:");
this.echo(msg);
this.echo(backtrace);
this.echo("=========================");
});
casper.start(base_uri + "/login", function () {
this.fill("form[name='login_form']", { username: username, password: password },true);
});
// can't click the reports button as that causes a weird file:// link problem
casper.thenOpen(base_uri + "reports");
casper.then(function() {
var url = this.evaluate(function() {
return __utils__.getElementByXPath("//a[contains(@href,'Account') and contains(@href,'Report')]").href;
});
// winOpenTransform is a function provided by the page; it's brittle for us to invoke it
// this way instead of clicking a button, but when you click the button, it pops up a new
// window, and PhantomJS doesn't currently support popup windows. Thus I have to call
// this function directly to avoid a popup.
url = winOpenTransform(url.match(/http:.+?(?=')/)[0]);
this.open(url);
});
casper.then(function() {
casper.page.injectJs('jquery.min.js'); // so we can pick an option with the select item below
});
casper.thenEvaluate(function() {
document.report.runtimeCondition.value = "ExampleField IS NOT NULL";
document.report.condition.value = "'','','examplename IS NOT NULL','21'";
document.report.target = "report";
document.report.submit();
});
casper.thenOpen("https://example.com/" + account + "/Report/reports");
casper.then(function() {
var url = this.evaluate(function() {
return __utils__.getElementByXPath("//a[./text()='CSV']")["href"];
});
this.echo("GETTING " + url);
this.download(url,"data.csv","GET");
});
casper.run();
@ameelin
Copy link

ameelin commented Mar 17, 2013

Mike,
if I have casperjs in path would I need these two lines?
phantom.casperPath = system.args[4];

phantom.injectJs(phantom.casperPath + '/bin/bootstrap.js');

~Neal

@stphnclysmth
Copy link

Given the installation information in the docs, I think that you can drop those two lines if you're calling the script with the CasperJS executable. I can't test this script, but it works with my own fork.

@gregorynicholas
Copy link

i'm assuming that jquery.min.js sat inside of the same directory as this file?

@altryne
Copy link

altryne commented Aug 16, 2013

Hi Mike,
I want to understand something,
How exatcly are you running this application on Heroku?
I have a casper script that works fine locally, I want to publish it to heroku so I can trigger it via a get/post from another service.
Is this possible?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment