Skip to content

Instantly share code, notes, and snippets.

@jimmont
Created May 23, 2014 17:39
Show Gist options
  • Save jimmont/ef92cd72283bcf6f90cf to your computer and use it in GitHub Desktop.
Save jimmont/ef92cd72283bcf6f90cf to your computer and use it in GitHub Desktop.
check if a file is text based on its content
// detect if a text file, like Perl's stat(file); if(-T) ...
fs.open(file,'r',function(err, fd){
if(err) return;
var len = 100;
var buf = new Buffer(len);
var re = /[.a-z0-9(){}\[\]? "':;<>?\s\n\r\t "':;#@~$%*<>.,_=+-]/ig;
fs.read(fd,buf,0,len,0,function(err,num,buf){
if(err || num < 1) return;
var str = buf.toString('utf-8',0,num);
// do we have enough of the types of characters typical of a text file?
var charCount = (str.match(re) || []).length;
if(charCount/num > 0.6){
// probably a text file
fs.readFile(file,'utf8',function(err,data){
if(err) return;
var results = data.match(options.search);
if(results){
console.log('found in (', (charCount/num), ')',file,results.length);
console.log(results);
};
});
};
fs.close(fd);
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment