Last active
August 29, 2015 14:22
-
-
Save soheilpro/dff033fcfd62ea0ad6d5 to your computer and use it in GitHub Desktop.
CSV Parser in JS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function ValueToken(value) { | |
this.value = value; | |
} | |
function CommaToken() { | |
} | |
function NewLineToken() { | |
} | |
function EOFToken() { | |
} | |
function Tokenizer(data) { | |
this.data = data; | |
this.currentIndex = 0; | |
} | |
Tokenizer.prototype.nextToken = function() { | |
if (this.currentIndex === this.data.length) | |
return new EOFToken(); | |
var nextChar = this.data.substring(this.currentIndex, this.currentIndex + 1); | |
// Comma | |
if (nextChar === ',') { | |
this.currentIndex++; | |
return new CommaToken(); | |
} | |
// New Line | |
if (nextChar === '\n') { | |
this.currentIndex++; | |
return new NewLineToken(); | |
} | |
// Quoted value | |
if (nextChar === '"') { | |
var nextQuoteCharIndex = this.data.indexOf('"', this.currentIndex + 1); | |
if (nextQuoteCharIndex === -1) | |
throw 'Unclosed quoted value.'; | |
value = this.data.substring(this.currentIndex + 1, nextQuoteCharIndex); | |
this.currentIndex = nextQuoteCharIndex + 1; | |
return new ValueToken(value); | |
} | |
// Value | |
var nextCommaOrNewLineIndex = this.data.substr(this.currentIndex).search(/,|\n/); | |
if (nextCommaOrNewLineIndex === -1) { | |
var value = this.data.substring(this.currentIndex); | |
this.currentIndex = this.data.length; | |
return new ValueToken(value); | |
} | |
var value = this.data.substring(this.currentIndex, this.currentIndex + nextCommaOrNewLineIndex); | |
this.currentIndex += nextCommaOrNewLineIndex; | |
return new ValueToken(value); | |
}; | |
function parse(data) { | |
var tokenizer = new Tokenizer(data); | |
var lines = []; | |
var line = []; | |
while (true) { | |
token = tokenizer.nextToken(); | |
if (token instanceof ValueToken) { | |
line.push(token.value); | |
continue; | |
} | |
if (token instanceof CommaToken) { | |
continue; | |
} | |
if (token instanceof NewLineToken) { | |
lines.push(line); | |
line = []; | |
continue; | |
} | |
if (token instanceof EOFToken) { | |
lines.push(line); | |
break; | |
} | |
throw 'Unsupported token' | |
} | |
return lines; | |
} | |
var input = 'artist,album,year\n"Dream Theater",Octavarium,2005\n"Pink Floyd","The\nWall",1979'; | |
console.log(parse(input)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment