Skip to content

Instantly share code, notes, and snippets.

@i09158knct
Last active December 11, 2015 04:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save i09158knct/4544997 to your computer and use it in GitHub Desktop.
Save i09158knct/4544997 to your computer and use it in GitHub Desktop.
Using Phantom.js
function generateGoogleSearchUrl(word, num) {
var encodedWord = encodeURI(word);
var number = '&num=' + (num || 50);
var prefix = 'https://www.google.co.jp/search?q=';
var suffix = '&ie=UTF-8';
return prefix + encodedWord + number + suffix;
}
function getWordList(fileName) {
var fs = require('fs');
var file = fs.open(fileName, 'r');
var wordList = file.read().split('\n');
return wordList;
}
function writeResult(word, result) {
var fs = require('fs');
fs.makeDirectory('outputs');
var file = fs.open('outputs/' + word, 'w');
file.write(result);
file.write('\n');
file.flush();
}
function evalGetUrlList() {
var links = document.getElementsByClassName('r');
links = [].slice.call(links);
var urls = links.map(function(link) {
return link.children[0].href;
});
urls = urls.filter(function(url) {
return !/.pdf$/.test(url);
});
return JSON.stringify(urls);
}
;(function _main() {
var fileName = phantom.args[0];
var urlNumber = +phantom.args[1];
if (typeof fileName === 'undefined') {
console.log('Error: no input file');
phantom.exit(1);
return;
}
var wordList = getWordList(fileName);
var page = require('webpage').create();
page.settings.userAgent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A403 Safari/8536.25';
mainLoop(0);
function mainLoop(current) {
var word = wordList[current];
if (!word) {
phantom.exit();
return;
}
var searchUrl = generateGoogleSearchUrl(word, urlNumber);
page.open(searchUrl, function(status) {
var resultJSON = page.evaluate(evalGetUrlList);
var result = JSON.parse(resultJSON).join('\n');
writeResult(word, result);
setTimeout(function() { mainLoop(current + 1); }, 1000);
});
}
})();
function generateGoogleSearchUrl(word, num) {
var encodedWord = encodeURI(word);
var number = "&num=" + (num || 50);
var prefix = "https://www.google.co.jp/search?q=";
var suffix = "&ie=UTF-8";
return prefix + encodedWord + number + suffix;
}
;(function _main() {
var word = phantom.args[0];
var urlNumber = +phantom.args[1];
var delay = +phantom.args[2] || 0;
if (typeof word === "undefined") {
console.log("Error: no search word");
phantom.exit(1);
return;
}
var page = require("webpage").create();
// page.settings.userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7";
page.settings.userAgent = "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A403 Safari/8536.25";
var serchUrl = generateGoogleSearchUrl(word, urlNumber);
setTimeout(function() {
main(word);
}, delay);
function main(word) {
page.open(serchUrl, function(status) {
var resultJSON = page.evaluate(function() {
var urlList = [];
var links = window.document.getElementsByClassName("l");
for (var i = 0, linksLength = links.length; i < linksLength; i++) {
urlList.push(links[i].href);
}
return JSON.stringify(urlList);
});
console.log(JSON.parse(resultJSON).join("\n"));
phantom.exit();
});
}
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment