Skip to content

Instantly share code, notes, and snippets.

@jpgninja
Created March 21, 2020 02:52
Show Gist options
  • Save jpgninja/dc50731af7c4a85f17136b2ca957608d to your computer and use it in GitHub Desktop.
Save jpgninja/dc50731af7c4a85f17136b2ca957608d to your computer and use it in GitHub Desktop.
const axios = require('axios');
const cheerio = require('cheerio');
/**
* Checks if a proxy is accessible.
*
* @param host Host of the proxy
* @param port Port of the proxy
*/
var stop = function(host, port, options, callback) {
console.log('\n\n-- Stopping scrape (Google) --\n');
}
/**
* Mission accomplished
*
* @param null Lorem Ipsum
*/
var scrapeCompleteHandler = function() {
console.log('\n\n-- Scrape complete (Google) --\n');
}
var search = (term, proxy) => {
let url;
let proxy_array = proxy.split(":");
let proxy_object = {};
let req_data = {};
// Setup proxy.
proxy_object.host = ( "undefined" !== typeof proxy_array[0] ) ? proxy_array[0] : "";
proxy_object.port = ( "undefined" !== typeof proxy_array[1] ) ? parseInt( proxy_array[1], 10 ) : 80;
if ( "" !== proxy_object.host ) {
req_data.proxy = proxy_object;
}
// Setup request.
req_data.timeout = 5000;
req_data.headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en-us) AppleWebKit/534.1+ (KHTML, like Gecko) Version/5.0 Safari/533.16",
};
// Setup term.
term += " -site:www.tumblr.com";
// term += " site:tumblr.com";
// Setup search URL.
url = `http://www.google.com/search?q=${encodeURIComponent(term)}&ie=UTF-8`; // Base URL.
url += "&num=100"; // Return 100 results.
url += "&pws=0"; // Personalized search on/off.
url += "&lr=lang_en"; // Interface language (for in case the proxy throws this off).
// url += "&safe=active"; // NSFW.
console.log( "Googling '%s' using proxy (%s:%d)", term, req_data.proxy.host, req_data.proxy.port );
console.log( "URL: %s", url );
return axios.get(url, req_data)
.catch( (res) => {
console.log("ACK! Google error.")
if ( "undefined" !== typeof res.response.status ) {
console.log(res.response.status, res.response.statusText);
console.log( "Message: '%s'", res.message );
console.log( res );
}
else if (res.isAxiosError) {
res = res.toJSON();
console.log( "Message: '%s'", res.message );
}
else if ("ECONNABORTED" === res.code ) {
console.log( "Message: '%s'", "Connection timed out." );
}
else {
console.log("Well, this is embarrassing... I have no output.");
console.log(res);
}
return false;
});
}
/*
* Scrapes Google SERP.
*/
let scrape_results = ( res ) => {
// Ensure we received a response.
if ( "undefined" === typeof res.data ) {
return;
}
let $ = cheerio.load(res.data);
let results = [];
console.log( "Found %d total results.", $("div#search").find(".rc .r").length );
$("div#search")
.find(".rc .r")
.map((i, el) => {
const result = {
title: $(el).find("h3").text(),
url: $(el).find("a").attr("href"),
description: $(el).find("div.s div span.st").text(),
};
results.push( result );
});
console.log( "\n\n\n---" )
console.log( "\n\n\nAll results\n\n\n");
console.log( results );
return results;
}
// Run.
search('red lipstick', '196.54.30.47:80')
.then( scrape_results )
.catch( (err) => {
console.log('ERR! Fetch error.');
console.log( err );
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment