Skip to content

Instantly share code, notes, and snippets.

@petamoriken
Created October 28, 2015 13:58
Show Gist options
  • Save petamoriken/f7c1158323331cc4f5d2 to your computer and use it in GitHub Desktop.
Save petamoriken/f7c1158323331cc4f5d2 to your computer and use it in GitHub Desktop.
PhantomJS では page.evaluate では非同期的なことができないけど XMLHttpRequest($.ajax) を同期的に使うことでなんとかなった()
var phantom = require('node-phantom-async');
var co = require('co');
var buildQuery = function (object) {
return Object.keys(object).map(function (key) {
return encodeURIComponent(key) + '=' + encodeURIComponent(object[key]);
}).join('&');
}
var scrapeFromCiNii = function (keywords) {
return co(function *() {
var params = {
q: (typeof keywords === 'string') ? keywords : keywords.join(' '),
count: 200
};
var page = yield (yield phantom.create().bind({})).createPage();
yield page.open('http://ci.nii.ac.jp/fulltext?' + buildQuery(params));
yield page.includeJs('https://code.jquery.com/jquery-2.1.4.min.js');
var bibs = yield page.evaluate(function() {
var items = [];
$('.item_title>a').each(function () {
items.push($(this).attr("href") + ".bib");
});
var bibs = [];
items.forEach(function(url) {
$.ajax({
async: false,
url: url,
type: "get"
}).done(function(data) {
bibs.push(data);
});
});
return bibs;
});
return bibs;
});
}
scrapeFromCiNii('mptcp').then(function (bibs) {
console.log('done');
console.log(bibs);
process.exit(0);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment