Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save atorralb/1d1d5e9333427fa6de0675a2a5c8f371 to your computer and use it in GitHub Desktop.
Save atorralb/1d1d5e9333427fa6de0675a2a5c8f371 to your computer and use it in GitHub Desktop.
iterate through a pagination site with nightmarejs
var Nightmare = require('nightmare');
var vo = require('vo');
vo(run)(function(err, result) {
if (err) throw err;
});
function* run() {
var nightmare = Nightmare(),
MAX_PAGE = 10,
currentPage = 0,
nextExists = true,
links = [];
yield nightmare
.goto('https://www.yahoo.com')
.type('.input-query', 'github nightmare')
.click('#search-submit')
.wait('body')
nextExists = yield nightmare.visible('.next');
while (nextExists && currentPage < MAX_PAGE) {
links.push(yield nightmare
.evaluate(function() {
var links = document.querySelectorAll("ol.searchCenterMiddle a");
return links[0].href;
}));
yield nightmare
.click('.next')
.wait('body')
currentPage++;
nextExists = yield nightmare.visible('.next');
}
console.dir(links);
yield nightmare.end();
}
@adi518
Copy link

adi518 commented Jul 2, 2017

Nicely done. I'm new to generators. Why do you need the last yield statement? I noticed the code breaks without it, but can't understand why.

@davidimprovz
Copy link

davidimprovz commented Jul 10, 2017

This code does not work. On my Ubuntu 16.04 box with the latest stable version of node, I got a series of errors, starting with:

/home/dev/node_modules/vo/lib/wrap.js:30
      return co(fn).apply(ctx, args.concat(next))
                    ^

TypeError: co(...).apply is not a function
    at wrap (/home/dev/node_modules/vo/lib/wrap.js:30:21)
    at func (/home/dev/node_modules/vo/lib/compile.js:44:23)
    at next (/home/dev/node_modules/vo/lib/pipeline.js:34:15)
    at Pipeline (/home/dev/node_modules/vo/lib/pipeline.js:47:8)
    at /home/dev/node_modules/vo/index.js:44:5
    at vo (/home/dev/node_modules/vo/index.js:97:7)
    at Object.<anonymous> (/home/dev/AUTOSIFT/ez_equity_daemon/scraping/nightmare/nightmare_tests.js:61:8)
    at Module._compile (module.js:569:30)
    at Object.Module._extensions..js (module.js:580:10)
    at Module.load (module.js:503:32)

Note that I am not using the co library. I have explicitly required vo. But for some reason I am seeing co listed in the error.

Just for grins, I npm uninstalled co and vo from my packages. Then reinstalled them. After that, the code would execute, except that I got another error, which seems to reference the first if (err) throw err; as follows:

nightmare_tests.js:62
    if (err) { throw err; }
               ^
TypeError: You may only yield a function, promise, generator, array, or object, but the following was passed: "undefined"
    at next (/home/dev/node_modules/vo/node_modules/co/index.js:106:12)
    at /home/dev/node_modules/vo/node_modules/co/index.js:93:18
    at /home/dev/node_modules/vo/node_modules/co/index.js:224:7
    at <anonymous>

After removing the err statement, I run the code and it simply hangs.

@dshaw002
Copy link

The examples really outdated and taken from a Github error a year ago. I made some adjustments and it works now:

var Nightmare = require('nightmare');
var vo = require('vo');
vo(run)(function(err, result) {
if (err) throw err;
});

function* run() {
var nightmare = Nightmare(),
MAX_PAGE = 10,
currentPage = 0,
nextExists = true,
links = [];

yield nightmare
    .goto('https://www.yahoo.com')
    .type('#uh-search-box', 'github nightmare')
    .click('#uh-search-button')
    .wait('ol.searchCenterMiddle')


nextExists = yield nightmare.visible('.next');

while (nextExists && currentPage < MAX_PAGE) {
    links.push(yield nightmare
        .evaluate(function() {
            var links = document.querySelectorAll("ol.searchCenterMiddle a");
            console.log(links[0].href);
            return links[0].href;
        }));

        yield nightmare
            .click('.next')
            .wait('body')

        currentPage++;
        nextExists = yield nightmare.visible('.next');
}
console.dir(links);
yield nightmare.end();

}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment