Last active
October 7, 2023 09:29
-
-
Save westc/15caf03f6c945564a126 to your computer and use it in GitHub Desktop.
Simple CSV Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Takes a string representation of a CSV and parses it into an array of arrays | |
* or an array of objects (dictionaries). | |
* @param {string} strCSV | |
* String representation of a CSV. | |
* @param {boolean=} opt_headerRow | |
* Optional boolean defaulting to false which if set to true indicates that | |
* the first row is to be collected as the header row and all rows returned | |
* will be objects keyed by those header names instead of arrays. | |
* @param {string=} opt_delimiter | |
* Optional string defaulting to "," which indicates the cell delimiter. If | |
* specified this must be only one character and may not be a double quote, | |
* a carriage return or a linefeed. | |
* @param {((value: string?, rowIndex: number, cellIndex: number, colHeader: string?) => *)=} opt_fnProcessCell | |
* Optional function used to pre-process the cell values. The values passed | |
* are the cell value found, the row index, the cell index, and the cell | |
* name (if opt_headerRow is true). The value returned will be stored in | |
* the array of rows returned. | |
* @return {{[k: string]: *}[]|*[][]} | |
* In the case that opt_headerRow is truish, an array of arrays will be | |
* returned with each sub-array representing a CSV row. In the case that | |
* opt_headerRow is not truish an array of objects keyed by the column names | |
* will be returned with each sub-object representing a CSV row. | |
*/ | |
function parseCSV(strCSV, opt_headerRow, opt_delimiter, opt_fnProcessCell) { | |
opt_delimiter = opt_delimiter || ','; | |
if (!/^[^\r\n"]$/.test(opt_delimiter)) { | |
throw new Error('The delimiter must be exactly 1 character and cannot be any of the following:\n- \\r\n- \\n\n- "'); | |
} | |
var delimiterPat = '\\u' + ('000' + opt_delimiter.charCodeAt(0).toString(16)).slice(-4); | |
var pattern = '([^"' + delimiterPat + '\r\n]*|"((?:[^"]+|"")*)")(' + delimiterPat + '|\r?\n|\r)'; | |
var colNames = [], isHeaderRow = opt_headerRow, rowCount = 0; | |
var row = [], rows = opt_headerRow ? [] : [row], colIndex = 0; | |
(strCSV + opt_delimiter).replace(new RegExp(pattern, 'g'), function(match, cell, quoted, delimiter) { | |
cell = quoted ? quoted.replace(/""/g, '"') : cell; | |
if (isHeaderRow) { | |
colNames.push(cell); | |
} | |
else { | |
row[opt_headerRow ? colNames[colIndex] : colIndex] = opt_fnProcessCell | |
? opt_fnProcessCell(cell, rowCount, colIndex, colNames[colIndex]) | |
: cell; | |
colIndex++; | |
} | |
if (delimiter !== opt_delimiter) { | |
rowCount++; | |
rows.push(row = opt_headerRow ? {} : []); | |
colIndex = isHeaderRow = 0; | |
} | |
}); | |
return rows; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Parse fails with: