Skip to content

Instantly share code, notes, and snippets.

@ben8p
Created December 16, 2016 11:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ben8p/e3b958ce082c87c67f9c9bb785645f67 to your computer and use it in GitHub Desktop.
Save ben8p/e3b958ce082c87c67f9c9bb785645f67 to your computer and use it in GitHub Desktop.
CSV parser
/**
* CSV Parser. Takes a string as input and returns
* an array of arrays (for each row).
*
* @param input String, CSV input
* @param options.delimiter String, single character used to separate fields.
* Defaults to null, if null, tries to guess it.
* @param doptions.quote String, single character used to quote non-simple fields.
* Defaults to "\"".
*/
class CSVParser {
constructor(data, options = {delimiter: null, quote: '"'}) {
this.delimiter = options.delimiter || this._guessDelimiter(data);
this.quote = options.quote;
if(!data) {
this.data = [['']];
return;
}
this.data = this._parse(data.split(''));
}
_addToStream(stream, c) {
if(c === this.quote && stream[stream.length - 1] === c) { return; }
stream.push(c)
}
_addStream(row, stream) {
row.push(stream.join(''));
stream.length = 0;
}
_addRow(rows, row, stream) {
this._addStream(row, stream);
rows.push(row);
return [];
}
_extractContent(data, stream, lastColumn) {
var c;
while((c = data.shift()) !== undefined) {
if(c === '\r' && data[0] === '\n') { continue; }
if(c === this.quote && (data[0] === undefined || (lastColumn === false && data[0] === this.delimiter) || (lastColumn === true && data[0] === '\n') || (lastColumn === undefined && (data[0] === this.delimiter || data[0] === '\n')))) {
return;
}
this._addToStream(stream, c);
}
}
_parse(data) {
var c,
row = [],
rows = [],
stream = [];
while((c = data.shift()) !== undefined) {
if(c === '\r' && data[0] === '\n') { continue; }
if(c === this.delimiter) {
this._addStream(row, stream);
} else if(c === '\n') {
row = this._addRow(rows, row, stream);
} else if(c === this.quote) {
this._extractContent(data, stream, rows[0] ? rows[0].length === row.length : undefined);
} else {
this._addToStream(stream, c);
}
}
this._addRow(rows, row, stream);
return rows;
}
_guessDelimiter(data) {
//guess delimiters using string occurences.
var map = {},
count = [],
possibleDelimiters = [',', ';', '^', '$', '.', '|', '\t'];
data.split(/\n|\r/).every(function(line, index, array) {
possibleDelimiters.forEach(function(possibleDelimiter) {
map[possibleDelimiter] = (map[possibleDelimiter] || 0) + line.split(possibleDelimiter).length;
count[map[possibleDelimiter]] = possibleDelimiter;
});
//only analyse 10% of the lines. It should be enough
if(index >= array.length * 0.1) {
return false;
}
return true;
});
return count.pop();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment