Created
November 27, 2012 15:31
-
-
Save lambdalisue/4154837 to your computer and use it in GitHub Desktop.
node.js CSV parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fs = require 'fs' | |
removeComment = (raw) -> | |
return raw.replace /#.*$/, '' | |
trimSpace = (str) -> | |
return str.replace /^\s+|\s+$/g, '' | |
parse = (data, delimiter=',') -> | |
# remove comment lines | |
raws = (raw for raw in data.split('\n')) | |
raws = (removeComment(raw) for raw in raws) | |
raws = (raw for raw in raws when raw) | |
# split by delimiter | |
raws = (raw.split(delimiter) for raw in raws) | |
# trim each columns | |
raws = ((trimSpace(cal) for cal in raw) for raw in raws) | |
return raws | |
parseFile = (filename, encoding, callback, delimiter=',') -> | |
if arguments.length < 3 | |
[encoding, callback] = ['utf-8', encoding] | |
fs.readFile filename, encoding, (err, data) -> | |
throw err if err | |
callback parse(data, delimiter) | |
parseFileSync = (filename, encoding='utf-8', delimiter=',') -> | |
if arguments.length < 3 | |
[encoding, delimiter] = ['utf-8', encoding] | |
parse fs.readFileSync filename, encoding | |
exports.parse = parse | |
exports.parseFile = parseFile | |
exports.parseFileSync = parseFileSync |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'better-require' | |
csv = require 'csv.coffee' | |
expect = require 'expect.js' | |
describe "csv.parse(data, delimiter=',')", -> | |
it "should parse single line CSV ('A, B, C, D, E')", -> | |
expect(csv.parse('A, B, C, D, E')).eql([ | |
['A', 'B', 'C', 'D', 'E'] | |
]) | |
it "should parse multi line CSV ('A, B, C, D, E\\nF, G, H, I, J')", -> | |
expect(csv.parse('A, B, C, D, E\nF, G, H, I, J')).eql([ | |
['A', 'B', 'C', 'D', 'E'] | |
['F', 'G', 'H', 'I', 'J'] | |
]) | |
it "should remove empty raw(line) in CSV ('A, B, C, D, E\\n\\nK, L, M, N, O')", -> | |
expect(csv.parse('A, B, C, D, E\n\nK, L, M, N, O')).eql([ | |
['A', 'B', 'C', 'D', 'E'] | |
['K', 'L', 'M', 'N', 'O'] | |
]) | |
it "should remove comment in CSV ('A, B, C, D, E\\n#F, G, H, I, J\\nK, L, M, N, O')", -> | |
expect(csv.parse('A, B, C, D, E\n#F, G, H, I, J\nK, L, M, N, O')).eql([ | |
['A', 'B', 'C', 'D', 'E'] | |
['K', 'L', 'M', 'N', 'O'] | |
]) | |
it "should keep empty column(item) in CSV ('A, B, , D, E,')", -> | |
expect(csv.parse('A, B, , D, E,')).eql([ | |
['A', 'B', '', 'D', 'E', ''] | |
]) | |
it "should parse with different delimiter ('A, B; C, D; E, F')", -> | |
expect(csv.parse('A, B; C, D; E, F', ';')).eql([ | |
['A, B', 'C, D', 'E, F'] | |
]) | |
describe "csv.parseFile(filename, encoding='utf-8', callback, delimiter=';')", -> | |
it "should parse CSV file (test.csv) with full arguments", (done) -> | |
csv.parseFile 'test.csv', 'utf-8', (raws) -> | |
expect(raws).eql([ | |
['A', 'B', 'C', 'D', 'E'] | |
['F', 'G', 'H', 'I', 'J'] | |
['K', 'L', 'M', 'N', 'O'] | |
]) | |
done() | |
, ',' | |
it "should parse CSV file (ttest.csv) with 3 arguments", (done) -> | |
csv.parseFile 'test.csv', 'utf-8', (raws) -> | |
expect(raws).eql([ | |
['A', 'B', 'C', 'D', 'E'] | |
['F', 'G', 'H', 'I', 'J'] | |
['K', 'L', 'M', 'N', 'O'] | |
]) | |
done() | |
it "should parse CSV file (test.csv) with 2 arguments", (done) -> | |
csv.parseFile 'test.csv', (raws) -> | |
expect(raws).eql([ | |
['A', 'B', 'C', 'D', 'E'] | |
['F', 'G', 'H', 'I', 'J'] | |
['K', 'L', 'M', 'N', 'O'] | |
]) | |
done() | |
describe "csv.parseFileSync(filename, encoding='utf-8', delimiter=';')", -> | |
it "should parse CSV file (test.csv) with full arguments", -> | |
raws = csv.parseFileSync 'test.csv', 'utf-8', ',' | |
expect(raws).eql([ | |
['A', 'B', 'C', 'D', 'E'] | |
['F', 'G', 'H', 'I', 'J'] | |
['K', 'L', 'M', 'N', 'O'] | |
]) | |
it "should parse CSV file (test.csv) with 2 arguments", -> | |
raws = csv.parseFileSync 'test.csv', ',' | |
expect(raws).eql([ | |
['A', 'B', 'C', 'D', 'E'] | |
['F', 'G', 'H', 'I', 'J'] | |
['K', 'L', 'M', 'N', 'O'] | |
]) | |
it "should parse CSV file (test.csv) with 1 argument", -> | |
raws = csv.parseFileSync 'test.csv' | |
expect(raws).eql([ | |
['A', 'B', 'C', 'D', 'E'] | |
['F', 'G', 'H', 'I', 'J'] | |
['K', 'L', 'M', 'N', 'O'] | |
]) |
We can make this file beautiful and searchable if this error is corrected: It looks like row 2 should actually have 1 column, instead of 5. in line 1.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a comment line | |
A, B, C, D, E | |
F, G, H, I, J # Trailing comments | |
K, L, M, N, O |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment