Skip to content

Instantly share code, notes, and snippets.

@jthatch
Created December 23, 2016 11:26
Show Gist options
  • Save jthatch/0e1613518c05136997064bed8d98a5a1 to your computer and use it in GitHub Desktop.
Save jthatch/0e1613518c05136997064bed8d98a5a1 to your computer and use it in GitHub Desktop.
Scan known google proxy IP subnets to determine if said ip's are google proxys. - This is written to help us detect traffic originating from google's "data saver" mobile chrome feature
#!/usr/bin/env node
/*
* dnsresolve.js
* Will resolve ip ranges to determine if they're google proxies.
*
* TIPS for increasing speed:
* Run on multi-core system
* ulimit -n 40000
*
* Examples:
* ./dnsresolve.js 62.249.*.*
*
* @author jamest
* @date 20/12/2016
*/
'use strict';
const fs = require('fs');
const util = require('util');
const path = require('path');
const cluster = require('cluster');
const EventEmitter = require('events').EventEmitter;
const dns = require('dns');
const chalk = require('chalk');
/**
* This follows the observer design pattern. We take arguments first from options, then argv then resort to defaults
* @constructor
*/
function Rdns() {
this.range = process.argv[2] || [
'64.233.173.*', // Provided by Akay
'66.249.93.*',
'66.249.82.*',
'66.249.85.*',
'66.102.9.*',
'66.102.6.*',
'66.249.88.*',
'66.102.7.*',
'66.249.80.*',
'66.249.85.*',
'66.249.83.*',
'66.249.84.*',
'66.102.8.*',
'64.233.173.*',
'64.233.172.*',
'66.249.93.*',
'66.102.9.*',
'66.249.82.*',
'64.9.249.*',
'64.233.172.*',
'64.233.173.*',
'64.9.249.*',
'66.102.6.*',
'66.102.7.*',
'66.102.8.*',
'66.102.9.*',
'66.249.80.*',
'66.249.82.*',
'66.249.83.*',
'66.249.84.*',
'66.249.85.*',
'66.249.88.*',
'66.249.93.*',
];
this.re = /google\-proxy\-[0-9\-]+\.google\.com/ig;
this.workers = require('os').cpus().length;
this.concurrentResolves = this.workers * 4;
// hard limit, feel free to remove this, but I find anymore than 4 is over kill
if (this.workers > 4) {
this.workers = 4;
this.concurrentResolves = this.workers * 4;
}
// internal variables
this._ips = [];
this._proxies = []; // format [[ip, address], .. n]
this._workersFinished = 0;
this._resolved = 0;
this._skipped = 0;
this._startTime = new Date().getTime();
EventEmitter.call(this);
}
Rdns.prototype.main = function() {
var _this = this;
/**
* Master, responsible for pulling the list of media from the 4chan thread and spinning up and directing workers
*/
if (cluster.isMaster) {
_this.log("Resolving from ", "c:green underline", this.range,
" using ", "c:green underline", this.workers, " threads and ",
"c:green underline", this.concurrentResolves, " concurrent resolves.");
// spawn our worker threads immediately as this is non-blocking but takes a little while
for (var i = 0; i < this.workers; i++) {
cluster.fork();
}
// receive messages from our worker threads, specifically when they've finished downloading a media file
Object.keys(cluster.workers).forEach(function(id){
_this.log("c:bgBlue bold", "worker #" + id + ' is online');
cluster.workers[id].on('message', function(msg) {
if (msg.cmd) {
switch (msg.cmd) {
case 'lookup':
_this._resolved++;
_this._proxies.push([msg.data.ip, msg.data.address]);
_this.log("c:green", "Resolved ", "c:green bold", msg.data.ip,
"c:green", " to ", "c:green bold", msg.data.address,
"c:green", " in " + _this.runTime(msg.data.duration));
_this.dispatchResolver(id);
break;
case 'skipped':
_this._skipped++;
_this.log("c:red", "Skipped ", "c:red bold", msg.data.ip,
"c:red", " due to ", "c:red bold", msg.data.err
);
_this.dispatchResolver(id);
break;
}
}
});
});
if (!(this._ips = this.parseRange(this.range))) {
this.log("Unable to parse range: " + this.range);
process.exit();
}
_this.log("c:bgGreen bold", 'Found ' + _this._ips.length + ' ips');
/**
* Initiate the download via the workers
*/
var lastWorker = 1;
var downloadsInProgress = 0;
while ( ( downloadsInProgress < _this.concurrentResolves ) && _this._ips.length ) {
var ip = _this._ips.shift();
lastWorker = lastWorker > _this.workers ? 1 : lastWorker;
_this.broadcastToWorkers(lastWorker++, 'lookup', ip);
downloadsInProgress++;
}
}
// worker
else {
// receive messages from master
process.on('message', function(msg) {
if (msg.cmd) {
switch(msg.cmd) {
case 'lookup':
_this.lookup(msg.data);
break;
case 'shutdown':
process.disconnect();
break;
default:
_this.log('Invalid msg: ' + msg.cmd + ': ' + JSON.stringify(msg.data));
break;
}
}
});
this.on('lookup', function (file) {
_this.broadcastToMaster('lookup', file);
});
this.on('skipped', function (file) {
_this.broadcastToMaster('skipped', file);
});
}
};
/**
* NOTE: *'s will be extrapolated to 1-255, eg parseRange('62.249.*.*') would generate 65536 ips
* Could do with rewriting the loops to make them recursive and O(log n)
* @param string IpRange - format 62.249.*.*
*/
Rdns.prototype.parseRange = function(ipRange) {
var _this = this;
var ips = [];
if (typeof ipRange == 'object') { // if it's an array
for (var i = 0; i< ipRange.length;i++) {
var range = ipRange[i];
ips.push(range);
}
}
else {
ips.push(ipRange);
}
var loop = true;
while (loop) {
for (var i in ips) { // this is O(n2) atm, could do with a speedup
var ip = ips[i];
var offset = ip.indexOf('*');
if (offset > -1) {
var ipBefore = ip.slice(0, offset);
var ipAfter = ip.slice(offset + 1);
for (var j = 1; j < 256; j++) {
var newIp = ipBefore + j + ipAfter;
ips[i] = newIp;
ips.push(newIp);
}
}
else {
loop = false;
}
}
}
// sort and remove any dupes
return ips.sort().filter(function(item, pos, arr) {
return !pos || item != arr[pos -1];
});
};
/**
* Dispatch a download to a particular worker assuming there's any files left
* @param id
*/
Rdns.prototype.dispatchResolver = function(id) {
var _this = this;
// If we still have files available to download, send them to the worker id
if (this._ips.length) {
var ip = this._ips.shift();
this.broadcastToWorkers(id, 'lookup', ip);
}
else {
if (++this._workersFinished >= this.concurrentResolves ||
this._skipped < (this._workersFinished || this.concurrentResolves) ||
this._resolved < (this._workersFinished || this.concurrentResolves)) {
_this.log();
_this.log("c:blue bold", "Resolved " + _this._resolved + " ips in " + _this.runTime());
this.broadcastToWorkers(false, 'shutdown');
this.saveProxies();
}
}
};
/**
* save the proxy ip's in tab+csv format.
* Filename is based on the range: 66.249.x.x.txt
*/
Rdns.prototype.saveProxies = function() {
var _this = this;
var fileName;
if (typeof this.range == 'object') {
fileName = 'google-proxies-' + this.dateStamp() + '.txt';
}
else {
var fileName = this.range.replace(/\*/g, 'x') + '.txt';
}
var str = "";
for (var key in this._proxies) {
var proxy = this._proxies[key];
str += proxy[0] + "\t" + proxy[1] + "\n";
}
fs.writeFileSync(fileName, str, "utf8");
this.log("Saved to ", "c:green bold", fileName);
};
/**
* Use DNS protocol to resolve an IP address to a hostname
* @param string ip
*/
Rdns.prototype.lookup = function(ip) {
var _this = this;
var startTime = new Date().getTime();
try {
dns.reverse(ip, function(err, domains) {
if (err) {
_this.emit('skipped', {err: err.code + ": " + err.message, ip: ip, duration: startTime});
}
else if (!domains) {
_this.emit('skipped', {err: 'NO_DOMAINS', ip: ip, duration: startTime});
}
else {
var found = false;
domains.forEach(function(domain) { // ips can have multiple rdns CNAMES/A etc
var match = _this.re.exec(domain); // check host matches googles' regex
if (match) {
_this.emit('lookup', {err: null, ip: ip, address: domain, duration: startTime});
found = true;
//break; // match found, no need to continue
}
});
if (!found) {
_this.emit('skipped', {err: 'NO_MATCHES', ip: ip, duration: startTime});
}
}
});
} catch (err) {
_this.emit('skipped', {err: err.code + ": " + err.message, ip: ip, duration: startTime});
}
};
/**
* broadcastToWorkers - if an id is defined we send the payload to only that worker, otherwise it gets broadcasted to all.
* Returns the number of messages broadcast
* @param bool|int id
* @param string
* @param array|object data
* @return int
*/
Rdns.prototype.broadcastToWorkers = function(id, cmd, data){
var count = 0;
// send to a selected worker
if (id && typeof cluster.workers[id] !== 'undefined') {
cluster.workers[id].send({ cmd: cmd, data: data });
count++;
}
else {
// send to all workers
Object.keys(cluster.workers).forEach(function(id){
cluster.workers[id].send({cmd : cmd, data : data});
count++;
});
}
return count;
};
/**
* broadcastToMaster sends a payload back to our master thread
* @param array|object payload
*/
Rdns.prototype.broadcastToMaster = function(cmd, data) {
process.send({ cmd: cmd, data: data });
};
/**
* Returns the date in the format DD-MM-YYYY
* @param Date dateObj (optional)
* @returns {string}
*/
Rdns.prototype.dateStamp = function(dateObj) {
dateObj = dateObj || new Date();
return dateObj.toISOString().split('T')[0].split('-').reverse().join('-');
};
/**
* I like nice looking log output
* Little log function to take advantage of ansi colours on the CL.
* Takes as many arguments as you want, they'll be joined together to form the log string.
* If you want to style start an argument with c: and then your colour(s) e.g.
* this.log('c:bgGreen bold', 'This is bold text with a green background');
*/
Rdns.prototype.log = function() {
var args = Array.prototype.slice.call(arguments);
var msg = '';
var skipNext = false;
for (var i = 0; i < args.length; i++) {
var arg = typeof args[i] == 'object' ? JSON.stringify(args[i]) : String(args[i]),
next = typeof args[i] == 'object' ? JSON.stringify(args[i + 1]) : String(args[i + 1]);
if (skipNext) {
skipNext = false;
continue;
}
if (arg && arg.substr(0,2) == 'c:') {
var color = arg.substr(2, arg.length);
color = color.split(' ');
if (color.length == 1)
msg += chalk[color[0]](next);
else if (color.length == 2)
msg += chalk[color[0]][color[1]](next);
else if (color.length == 3)
msg += chalk[color[0]][color[1]][color[2]](next);
skipNext = true;
}
else {
msg += arg;
skipNext = false;
}
}
var str = this.runTime() + chalk.grey('> ');
var noAnsi = str.replace(/[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g, '');
var padding = Array(12).join(' ');
var maxLength = 12;
console.log(str + padding.substring(0, maxLength - noAnsi.length) + msg);
};
/**
* Returns the duration
* @param (optional) startTime
* @returns {string}
*/
Rdns.prototype.runTime = function(startTime) {
var millisecondDiff = new Date().getTime() - (typeof startTime !== 'undefined' ? startTime : this._startTime);
var elapsed = {
'days' : 0,
'hours' : 0,
'mins' : 0,
'secs' : 0,
'ms' : millisecondDiff
};
if (millisecondDiff > 0) {
elapsed.ms = millisecondDiff % 1e3;
millisecondDiff = Math.floor( millisecondDiff / 1e3 );
elapsed.days = Math.floor( millisecondDiff / 86400 );
millisecondDiff %= 86400;
elapsed.hours = Math.floor ( millisecondDiff / 3600 );
millisecondDiff %= 3600;
elapsed.mins = Math.floor ( millisecondDiff / 60 );
millisecondDiff %= 60;
elapsed.secs = Math.floor( millisecondDiff );
}
var showMs = true;
var str = '';
if (elapsed.days > 0) {
str += chalk.bold(elapsed.days) +'d ';
showMs = false;
}
if (elapsed.hours > 0) {
str += chalk.bold(elapsed.hours) + 'h ';
showMs = false;
}
if (elapsed.mins > 0) {
str += chalk.bold(elapsed.mins) + 'm ' ;
}
if (( elapsed.secs > 0 && showMs ) || ( elapsed.secs == 0 && elapsed.ms > 0 ) ) {
str += chalk.bold(elapsed.secs) + '.' + chalk.bold(String(elapsed.ms).substr(0,2)) + 's';
}
else {
str += chalk.bold(elapsed.secs) + 's';
}
return str;
};
/**
* Outputs usage to the screen, including examples
*/
Rdns.prototype.usage = function() {
var _this = this;
_this.log();
_this.log('c:bold','Usage: ./dnsresolve.js [range]');
_this.log();
_this.log("Range should be in the format: 62.249.*.*");
_this.log();
_this.log("Examples:");
_this.log("c:bold", "./dnsresolve.js 62.249.9.*");
_this.log();
}
util.inherits(Rdns, EventEmitter);
// if we are being run as a command line app, execute our program
if (process.argv[1] == __filename) {
var rdns = new Rdns();
rdns.main();
}
else {
module.export = Rdns;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment