Skip to content

Instantly share code, notes, and snippets.

@atomkirk
Last active February 13, 2023 09:19
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save atomkirk/eccb66f77b306d0d1fcecb2c605bd22e to your computer and use it in GitHub Desktop.
Save atomkirk/eccb66f77b306d0d1fcecb2c605bd22e to your computer and use it in GitHub Desktop.
parse csv with javascript
import parseCsv from 'zipbooks/utils/parse-csv'
import { module, test } from 'qunit'
module('Unit | Utility | parse-csv', function(_hooks) {
test('parses csv successfully', function(assert) {
let result = parseCsv('name,age\nadam,31\ntim,32\n"St, clair",26')
assert.equal(JSON.stringify(result), '[["name","age"],["adam","31"],["tim","32"],["St, clair","26"]]')
})
test('parses with header', function(assert) {
let result = parseCsv('name,age\nadam,31\ntim,32\n"St, clair",26', {headers: true})
assert.equal(JSON.stringify(result), '[{"name":"adam","age":"31"},{"name":"tim","age":"32"},{"name":"St, clair","age":"26"}]')
})
})
// extracted from: https://stackoverflow.com/a/14991797/798055
export default function parseCsv(str, opts = {}) {
var arr = [];
var quote = false; // true means we're inside a quoted field
var col, c;
// iterate over each character, keep track of current row and column (of the returned array)
for (var row = col = c = 0; c < str.length; c++) {
var cc = str[c], nc = str[c+1]; // current character, next character
arr[row] = arr[row] || []; // create a new row if necessary
arr[row][col] = arr[row][col] || ''; // create a new column (start with empty string) if necessary
// If the current character is a quotation mark, and we're inside a
// quoted field, and the next character is also a quotation mark,
// add a quotation mark to the current column and skip the next character
if (cc == '"' && quote && nc == '"') { arr[row][col] += cc; ++c; continue; }
// If it's just one quotation mark, begin/end quoted field
if (cc == '"') { quote = !quote; continue; }
// If it's a comma and we're not in a quoted field, move on to the next column
if (cc == ',' && !quote) { ++col; continue; }
// If it's a newline (CRLF) and we're not in a quoted field, skip the next character
// and move on to the next row and move to column 0 of that new row
if (cc == '\r' && nc == '\n' && !quote) { ++row; col = 0; ++c; continue; }
// If it's a newline (LF or CR) and we're not in a quoted field,
// move on to the next row and move to column 0 of that new row
if (cc == '\n' && !quote) { ++row; col = 0; continue; }
if (cc == '\r' && !quote) { ++row; col = 0; continue; }
// Otherwise, append the current character to the current column
arr[row][col] += cc;
}
if (opts.headers) {
let header = arr[0]
let rest = arr.slice(1)
return rest.map(r => {
return r.reduce((acc, v, i) => {
let key = header[i]
acc[key] = v
return acc
}, {})
})
}
else {
return arr;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment