Skip to content

Instantly share code, notes, and snippets.

@soheilpro
Last active August 29, 2015 14:22
Show Gist options
  • Save soheilpro/dff033fcfd62ea0ad6d5 to your computer and use it in GitHub Desktop.
Save soheilpro/dff033fcfd62ea0ad6d5 to your computer and use it in GitHub Desktop.
CSV Parser in JS
function ValueToken(value) {
this.value = value;
}
function CommaToken() {
}
function NewLineToken() {
}
function EOFToken() {
}
function Tokenizer(data) {
this.data = data;
this.currentIndex = 0;
}
Tokenizer.prototype.nextToken = function() {
if (this.currentIndex === this.data.length)
return new EOFToken();
var nextChar = this.data.substring(this.currentIndex, this.currentIndex + 1);
// Comma
if (nextChar === ',') {
this.currentIndex++;
return new CommaToken();
}
// New Line
if (nextChar === '\n') {
this.currentIndex++;
return new NewLineToken();
}
// Quoted value
if (nextChar === '"') {
var nextQuoteCharIndex = this.data.indexOf('"', this.currentIndex + 1);
if (nextQuoteCharIndex === -1)
throw 'Unclosed quoted value.';
value = this.data.substring(this.currentIndex + 1, nextQuoteCharIndex);
this.currentIndex = nextQuoteCharIndex + 1;
return new ValueToken(value);
}
// Value
var nextCommaOrNewLineIndex = this.data.substr(this.currentIndex).search(/,|\n/);
if (nextCommaOrNewLineIndex === -1) {
var value = this.data.substring(this.currentIndex);
this.currentIndex = this.data.length;
return new ValueToken(value);
}
var value = this.data.substring(this.currentIndex, this.currentIndex + nextCommaOrNewLineIndex);
this.currentIndex += nextCommaOrNewLineIndex;
return new ValueToken(value);
};
function parse(data) {
var tokenizer = new Tokenizer(data);
var lines = [];
var line = [];
while (true) {
token = tokenizer.nextToken();
if (token instanceof ValueToken) {
line.push(token.value);
continue;
}
if (token instanceof CommaToken) {
continue;
}
if (token instanceof NewLineToken) {
lines.push(line);
line = [];
continue;
}
if (token instanceof EOFToken) {
lines.push(line);
break;
}
throw 'Unsupported token'
}
return lines;
}
var input = 'artist,album,year\n"Dream Theater",Octavarium,2005\n"Pink Floyd","The\nWall",1979';
console.log(parse(input));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment