Skip to content

Instantly share code, notes, and snippets.

@jamlfy
Last active December 26, 2015 04:29
Show Gist options
  • Save jamlfy/7093182 to your computer and use it in GitHub Desktop.
Save jamlfy/7093182 to your computer and use it in GitHub Desktop.
This program reads a folder or file, and tests, and screen prints only Emails. It is built for large amounts of data.
var fs = require('fs')
var events = require('events')
var util = require('util')
var path = require('path')
var FsPool = function (dir) {
events.EventEmitter.call(this)
this.dir = dir;
this.files = [];
this.active = [];
this.threads = 1;
this.on('run', this.runQuta.bind(this) );
};
// So will act like an event emitter
util.inherits(FsPool, events.EventEmitter);
FsPool.prototype.runQuta = function() {
if(this.files.length === 0 && this.active.length === 0) {
return this.emit('done');
}
if(this.active.length < this.threads) {
var name = this.files.shift()
this.active.push(name)
var fileName = path.join(this.dir, name);
var self = this;
fs.stat(fileName, function (err, stats) {
if(err)
throw err;
if( stats.isFile() ) {
fs.readFile(fileName, function(err, data) {
if(err)
throw err;
self.active.splice(self.active.indexOf(name), 1)
self.emit('file', name, data);
self.emit('run');
});
} else {
self.active.splice(self.active.indexOf(name), 1)
self.emit('dir', name);
self.emit('run');
}
});
}
return this
};
FsPool.prototype.init = function() {
var dir = this.dir;
var self = this;
fs.readdir(dir, function(err, files) {
if(err)
throw err;
self.files = files
self.emit('run');
});
return this
};
module.exports = FsPool;
#!/usr/bin/env node
GLOBAL.os = require('os');
GLOBAL.fs = require('fs');
GLOBAL.sys = require('sys');
GLOBAL.util = require('util');
GLOBAL.path = require('path');
GLOBAL.pool = require('./pool.js');
GLOBAL.program = require('commander');
GLOBAL._ = require('underscore');
GLOBAL.reader = require('line-reader');
GLOBAL.childp = require('child_process');
GLOBAL.tmp = os.tmpDir();
_.str = require('underscore.string');
_.mixin(_.str.exports());
function parceList ( val ){
var all = []
var list = val.split(',');
for (var i = list.length - 1; i >= 0; i--) {
all.push( new RegExp( list[i] ) );
return all;
}
program
.option('-i, --in <path>', 'Path with in' )
.option('-r, --recursive', 'Recursive')
.option('-e, --encoding <encoding>', 'encoding')
.option('-t, --test <test1,test2...>', 'Test in RegExp', parceList);
program.parse(process.argv);
//Carpeta o archivo a leer
const inner = path.resolve( program.in ? program.in : './data.txt' );
// Expresiones regulares a testear
const exp = program.test || [
/[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}/gm,
/[a-zA-Z0-9._-]+&#64[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}/gm,
///^[Form]/
];
if( program.recursive ){
var fsPool = new pool(inner);
fsPool.on('file', function (fileName, fileData) {
console.log('file name: ' + fileName);
var datas = fileData.toString('utf8');
var test = [];
exp.forEach( function (reg ){
test = _.union( test, datas.match(reg) || [] );
});
if( _.isArray(test) && test.length > 1 ){
for (var i = test.length - 1; i >= 0; i--)
console.log( test[i] );
}
});
fsPool.on('dir', function (dirName) {
console.log('dir name: ' + dirName);
})
fsPool.on('done', function() {
console.log('done Ok');
});
fsPool.init();
// Ejecusion Clasico
} else {
console.log('file name: ' + inner);
reader.eachLine(inner, function (line, last) {
var test = [];
exp.forEach( function (reg ){
test = _.union( test, datas.match(reg) || [] );
});
if( _.isArray(test) && test.length > 1 ){
for (var i = test.length - 1; i >= 0; i--)
console.log( test[i] );
}
if(last && data.length > 1){
console.log('done Ok');
process.exit();
}
});
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment