Last active
October 22, 2023 15:20
-
-
Save loganmzz/0bc8644460995595dabc57c141f30b22 to your computer and use it in GitHub Desktop.
CSV Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require("fs"); | |
const stdin = fs.readFileSync("/dev/stdin", "utf-8"); | |
class Reader { | |
location = {l: 0, c:0}; | |
i = 0; | |
constructor(stdin) { | |
this.stdin = stdin; | |
this.current = this.stdin.length > 0 ? this.stdin[0] : undefined; | |
} | |
// current() { | |
// return | |
// } | |
next() { | |
if (this.i < this.stdin.length) { | |
this.i++;this.location.c++; | |
this.current = this.stdin[this.i]; | |
if (this.current == '\n') { | |
this.location.l++; | |
this.location.c = 0; | |
} | |
} else { | |
this.current = undefined; | |
} | |
return this.current; | |
} | |
substring(start, end) { | |
return this.stdin.substring(start, end); | |
} | |
readUntil(chars) { | |
console.log(`Reader.readUntil: start (${this.location.l},${this.location.c})`); | |
let start = 0; | |
while (this.current !== undefined && !chars.includes(this.current)) { | |
this.next(); | |
} | |
return this.substring(start, this.i); | |
} | |
readSpaces() { | |
console.log(`Reader.readSpaces: start (${this.location.l},${this.location.c})`); | |
let start = this.i; | |
let next = this.current(); | |
while (this.current === ' ') { | |
next = this.next(); | |
} | |
return this.substring(start, this.i); | |
} | |
readValue() { | |
console.log(`Reader.readValue: start (${this.location.l},${this.location.c})`); | |
const enclosed = this.current === '"'; | |
if (enclosed) { | |
this.next(); // Passed '"' | |
let string = ''; | |
let finished = false; | |
let start = this.i; | |
while (!finished) { | |
let substring = this.readUntil('"'); | |
string += substring; | |
this.next(); // Passed '"' | |
if (this.current === '"') { | |
string += this.current; | |
this.next(); // Passed '"' | |
start = this.i; | |
} else { | |
finished = true; | |
} | |
} | |
console.log(`Reader.readValue: end (${this.location.l},${this.location.c}: ${string})`); | |
return string; | |
} else { | |
let string = this.readUntil([',', '\n']); | |
console.log(`Reader.readValue: end (${this.location.l},${this.location.c}: ${string})`); | |
return string; | |
} | |
} | |
readRow() { | |
console.log(`Reader.readRow: start (${this.location.l},${this.location.c})`); | |
let row = []; | |
let finished = false; | |
while (!finished) { | |
let value = this.readValue(); | |
row.push(value); | |
switch (this.current) { | |
case ',': | |
// Go next value | |
this.next(); | |
break; | |
case '\n': | |
this.next(); | |
finished = true; | |
break; | |
default: | |
// Error | |
throw `Invalid character (${this.current}) at (${this.location.l},${this.location.c}}): expected value separator (',') or end of line ('\n')`; | |
} | |
} | |
return row; | |
} | |
readAll() { | |
console.log(`Reader.readAll: start (${this.location.l},${this.location.c})`); | |
let rows = []; | |
let finished = false; | |
while (!finished) { | |
let row = this.readRow(); | |
rows.push(row); | |
finished = this.current === undefined; | |
} | |
return rows; | |
} | |
} | |
// function readString(stdin, start) { | |
// let string = ""; | |
// if (stdin[start] != '"') throw `Invalid character start: ${stdin[start]} (position: ${start})`; | |
// let finished = false; | |
// let next = start+1; | |
// while (!finished) { | |
// const indexOf = stdin.indexOf('"', next); | |
// if (indexOf < 0) { | |
// throw `Unexpected EOF: Data starting at ${start} is incomplete`; | |
// } | |
// string += stdin.substring(next, indexOf); | |
// if ((indexOf+1) < stdin.length && stdin[indexOf+1] == '"') { | |
// next = indexOf+2; | |
// } else { | |
// next = indexOf+1; | |
// finished = true; | |
// } | |
// } | |
// return {string,next}; | |
// } | |
// function readRow(stdin, start) { | |
// let finished = false; | |
// let row = []; | |
// let next = start; | |
// while (!finished) { | |
// let read = readString(stdin, next); | |
// row.push(read.string); | |
// if (read.next >= stdin.length) { | |
// next = read.next; | |
// finished = true; | |
// } else { | |
// switch (stdin[read.next]) { | |
// case ',': | |
// next = read.next + 1; | |
// break; | |
// case '\n': | |
// next = read.next + 1; | |
// finished = true; | |
// break; | |
// default: | |
// throw `Unexpected character between cell values: ${stdin[read.next]} (position: ${read.next})`; | |
// } | |
// } | |
// } | |
// return {row,next}; | |
// } | |
// function readData(stdin) { | |
// let finished = false; | |
// let next = 0; | |
// let data = []; | |
// while (!finished) { | |
// let spaces = readSpaces(stdin, next); | |
// if (spaces.next >= stdin.length) { | |
// finished = true; | |
// } else { | |
// let read = readRow(stdin, spaces.next); | |
// data.push(read.row); | |
// finished = read.next >= stdin.length; | |
// next = read.next; | |
// } | |
// } | |
// return {data,next}; | |
// } | |
console.log(JSON.stringify(new Reader(stdin).readAll(), undefined, 2)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment