Skip to content

Instantly share code, notes, and snippets.

@cecilemuller
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cecilemuller/2be9771b1c6d50d83884 to your computer and use it in GitHub Desktop.
Save cecilemuller/2be9771b1c6d50d83884 to your computer and use it in GitHub Desktop.
Parse a large UTF8 text file, line by line, asynchronously (e.g. for saving to the database before continuing)
/* global console*/
/* global require*/
var count = 0;
var parse = require('./parser');
parse(
'a_big_text_file.txt',
function process_one_line(line, done){
'use strict';
console.log('[LINE] ' + line);
count++;
// Do some async processing like saving to the database.
// Then when you're ready to continue:
done();
},
function finished(err){
'use strict';
if (err){
console.error('FAILED: ' + err.message);
} else {
console.log('FINISHED: there were ' + count + ' lines.');
}
}
);
/* global module */
/* global require */
var fs = require('fs');
var async = require('async');
module.exports = function(filepath, cb, done){
'use strict';
var stream = fs.createReadStream(
filepath,
{
flags: 'r',
encoding: 'utf-8'
}
);
var buffer = '';
var process_buffer = function(is_last_chunk, done_for_now){
buffer = buffer.replace('\r', '');
var lines = buffer.split('\n');
if (!is_last_chunk){
buffer = lines.pop();
}
async.eachSeries(lines, cb, done_for_now);
};
stream
.on('data', function(chunk){
stream.pause();
buffer += chunk;
process_buffer(false, function(){
stream.resume();
});
})
.on('error', done)
.on('end', function(){
if (buffer !== ''){
process_buffer(true, function(){
done();
});
} else {
done();
}
});
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment