|
// Parses a CSV file. |
|
// If told it has a header, the callback is called with an object (per line) where the keys are the header names. |
|
// If asked to return field indexes, the callback is called with an array (per line) with the extracted values. |
|
// |
|
// Made to run on the Node.js platform. |
|
// Should accept more encodings, using a conversion library. |
|
|
|
var fs = require('fs'); |
|
|
|
// http://blog.james-carr.org/2010/07/07/parsing-csv-files-with-nodejs/ |
|
// https://gist.github.com/PhiLhoSoft/ff60eefcb8ed43326cd7 |
|
function parseCsvFile(fileName, options, onNext, onComplete) |
|
{ |
|
var lineNb = 0, header = [], buffer = ''; |
|
var pattern = /(?:^|,)("(?:[^"]*)"|[^,]*)/g; |
|
var stream = fs.createReadStream(fileName, options.readOptions); |
|
stream.on('data', function (data) // data is a Buffer |
|
{ |
|
// Add chunk of data to remainder of previous chunk => the CSV file MUST have a blank line at the end! |
|
buffer += data.toString(); |
|
// Cut the content of the buffer in lines |
|
var lines = buffer.split(/[\r\n]+/); |
|
lines.forEach(function(line, idx) |
|
{ |
|
// Don't process the last, partial line of this chunk |
|
if (idx === lines.length - 1) return; |
|
processLine(line, idx); |
|
}); |
|
buffer = lines[lines.length - 1]; |
|
}); |
|
stream.on('end', function () |
|
{ |
|
// Process the last, complete line of the file (skipped if empty) |
|
processLine(buffer, 1); |
|
if (onComplete) |
|
{ |
|
onComplete(); |
|
} |
|
}); |
|
|
|
function processLine(line, idx) |
|
{ |
|
if (line === '') |
|
return; // Skip empty lines |
|
|
|
if (options.hasHeader) |
|
{ |
|
if (lineNb++ === 0 && idx === 0) |
|
{ |
|
header = line.split(pattern); |
|
} |
|
else |
|
{ |
|
onNext(buildRecord(line)); |
|
} |
|
} |
|
else |
|
{ |
|
onNext(extractFields(line)); |
|
} |
|
} |
|
|
|
function buildRecord(line) |
|
{ |
|
var record = {}; |
|
line.split(pattern).forEach(function (value, index) |
|
{ |
|
if (header[index] !== '') |
|
{ |
|
record[header[index]] = value.replace(/"/g, ''); |
|
} |
|
}) |
|
return record; |
|
} |
|
|
|
function extractFields(line) |
|
{ |
|
var fields = []; |
|
line.split(pattern).forEach(function(value, index) |
|
{ |
|
if (index % 2 === 0) |
|
return; // Skip, that's the separator |
|
index = Math.floor(index / 2); |
|
|
|
var idx; |
|
if (options.fieldIndexes !== undefined) |
|
{ |
|
idx = options.fieldIndexes.findIndex(function (v) { return v === index; }); |
|
if (idx === -1) |
|
return; |
|
} |
|
else |
|
{ |
|
idx = index; |
|
} |
|
|
|
fields[idx] = value.replace(/"/g, ''); |
|
}) |
|
return fields; |
|
} |
|
} |
|
|
|
module.exports = parseCsvFile; |