Last active
December 26, 2015 04:29
-
-
Save jamlfy/7093182 to your computer and use it in GitHub Desktop.
This program reads a folder or file, and tests, and screen prints only Emails. It is built for large amounts of data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require('fs') | |
var events = require('events') | |
var util = require('util') | |
var path = require('path') | |
var FsPool = function (dir) { | |
events.EventEmitter.call(this) | |
this.dir = dir; | |
this.files = []; | |
this.active = []; | |
this.threads = 1; | |
this.on('run', this.runQuta.bind(this) ); | |
}; | |
// So will act like an event emitter | |
util.inherits(FsPool, events.EventEmitter); | |
FsPool.prototype.runQuta = function() { | |
if(this.files.length === 0 && this.active.length === 0) { | |
return this.emit('done'); | |
} | |
if(this.active.length < this.threads) { | |
var name = this.files.shift() | |
this.active.push(name) | |
var fileName = path.join(this.dir, name); | |
var self = this; | |
fs.stat(fileName, function (err, stats) { | |
if(err) | |
throw err; | |
if( stats.isFile() ) { | |
fs.readFile(fileName, function(err, data) { | |
if(err) | |
throw err; | |
self.active.splice(self.active.indexOf(name), 1) | |
self.emit('file', name, data); | |
self.emit('run'); | |
}); | |
} else { | |
self.active.splice(self.active.indexOf(name), 1) | |
self.emit('dir', name); | |
self.emit('run'); | |
} | |
}); | |
} | |
return this | |
}; | |
FsPool.prototype.init = function() { | |
var dir = this.dir; | |
var self = this; | |
fs.readdir(dir, function(err, files) { | |
if(err) | |
throw err; | |
self.files = files | |
self.emit('run'); | |
}); | |
return this | |
}; | |
module.exports = FsPool; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
GLOBAL.os = require('os'); | |
GLOBAL.fs = require('fs'); | |
GLOBAL.sys = require('sys'); | |
GLOBAL.util = require('util'); | |
GLOBAL.path = require('path'); | |
GLOBAL.pool = require('./pool.js'); | |
GLOBAL.program = require('commander'); | |
GLOBAL._ = require('underscore'); | |
GLOBAL.reader = require('line-reader'); | |
GLOBAL.childp = require('child_process'); | |
GLOBAL.tmp = os.tmpDir(); | |
_.str = require('underscore.string'); | |
_.mixin(_.str.exports()); | |
function parceList ( val ){ | |
var all = [] | |
var list = val.split(','); | |
for (var i = list.length - 1; i >= 0; i--) { | |
all.push( new RegExp( list[i] ) ); | |
return all; | |
} | |
program | |
.option('-i, --in <path>', 'Path with in' ) | |
.option('-r, --recursive', 'Recursive') | |
.option('-e, --encoding <encoding>', 'encoding') | |
.option('-t, --test <test1,test2...>', 'Test in RegExp', parceList); | |
program.parse(process.argv); | |
//Carpeta o archivo a leer | |
const inner = path.resolve( program.in ? program.in : './data.txt' ); | |
// Expresiones regulares a testear | |
const exp = program.test || [ | |
/[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}/gm, | |
/[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}/gm, | |
///^[Form]/ | |
]; | |
if( program.recursive ){ | |
var fsPool = new pool(inner); | |
fsPool.on('file', function (fileName, fileData) { | |
console.log('file name: ' + fileName); | |
var datas = fileData.toString('utf8'); | |
var test = []; | |
exp.forEach( function (reg ){ | |
test = _.union( test, datas.match(reg) || [] ); | |
}); | |
if( _.isArray(test) && test.length > 1 ){ | |
for (var i = test.length - 1; i >= 0; i--) | |
console.log( test[i] ); | |
} | |
}); | |
fsPool.on('dir', function (dirName) { | |
console.log('dir name: ' + dirName); | |
}) | |
fsPool.on('done', function() { | |
console.log('done Ok'); | |
}); | |
fsPool.init(); | |
// Ejecusion Clasico | |
} else { | |
console.log('file name: ' + inner); | |
reader.eachLine(inner, function (line, last) { | |
var test = []; | |
exp.forEach( function (reg ){ | |
test = _.union( test, datas.match(reg) || [] ); | |
}); | |
if( _.isArray(test) && test.length > 1 ){ | |
for (var i = test.length - 1; i >= 0; i--) | |
console.log( test[i] ); | |
} | |
if(last && data.length > 1){ | |
console.log('done Ok'); | |
process.exit(); | |
} | |
}); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment