Skip to content

Instantly share code, notes, and snippets.

@park-brian
Last active June 2, 2021 15:24
Show Gist options
  • Save park-brian/77ee022ad355db683aa8ebc83dd0c90e to your computer and use it in GitHub Desktop.
Save park-brian/77ee022ad355db683aa8ebc83dd0c90e to your computer and use it in GitHub Desktop.
CSV utilities
/**
* A parser which handles any rfc4180 compliant csv file
* Configuration takes the following properties
* delimiter: specifies the field delimiter (default: ",")
* escape: specifies the escape character (default: ")
* skipLines: specifies the number of lines to skip (default: 0)
* transformRow: transform sthe default ouput from an array of strings to your custom format (eg: an array of objects)
* transformValue: transforms individual values (eg: for custom typecasting logic)
*/
function fromCsv(text, config) {
let {
delimiter,
escape,
skipLines,
headers,
transformHeaders,
transformRecord,
transformValue,
} = {
delimiter: ',',
escape: '"',
skipLines: 0,
headers: true,
transformHeaders: (headers) => headers.map(header => header.trim()),
transformRecord: (record) => record,
transformValue: (value, header) => {
if (value === '' || value === undefined || value === config.nullValue)
return null;
else if (!isNaN(value))
return Number(value);
else
return String(value).trim();
},
...config
};
let buffer = '';
let escaped = false;
let fields = [];
let records = [];
let numLines = 0;
const appendField = () => {
fields.push(buffer);
buffer = '';
}
const addFieldToRecord = (record, header, index) => ({
...record,
[header]: transformValue(fields[index], header)
})
const appendRecord = () => {
// skip lines
if (numLines >= skipLines) {
if (headers) {
// skip the first line of input if headers are present in the input file (eg: headers: true)
if (!Array.isArray(headers) && numLines === skipLines) {
headers = transformHeaders(fields);
fields = [];
numLines ++;
return;
}
fields = headers.reduce(addFieldToRecord, {});
} else {
fields = fields.map(transformValue);
}
records.push(transformRecord(fields));
}
fields = [];
numLines ++;
}
for (let i = 0; i < text.length; i ++) {
const char = text[i];
// handle delimiters and newlines
if (!escaped && (char === delimiter || char === '\n')) {
// on delimiters/newlines, push buffer to fields
appendField();
// on newlines, push fields to record
if (char === '\n') {
appendRecord()
}
}
// handle regular characters
else if (char !== escape) {
buffer += char;
}
// handle escape characters
else if (char === escape) {
// handle escaped double quotes
if (escaped && text[i + 1] === escape) {
i ++;
buffer += escape;
continue;
}
// otherwise, toggle the "escaped" flag whenever we encounter quotes
escaped = !escaped;
}
}
if (buffer.length > 0) {
appendField(buffer);
}
if (fields.length > 0) {
appendRecord(fields)
}
if (headers) {
return { headers, data: records }
} else {
return records;
}
}
/**
* Generates rfc4180-compliant csv files from arrays of arrays/objects
* If an array of objects is provided, the config.headers property
* allows the user to specify headers as an array of strings
*
*/
function toCsv(data, config) {
const {
delimiter,
newline,
} = {
delimiter: ',',
newline: '\r\n',
...config
}
const escape = value => typeof value !== 'number'
? `"${String(value).replace(/"/g, '""')}"`
: value;
let rows = [];
for (let row of data) {
if (!Array.isArray(row)) {
const headers = config.headers || Object.keys(row);
if (rows.length === 0) {
rows.push(headers.map(escape).join(delimiter))
}
row = headers.map(header => row[name]);
}
rows.push(row.map(escape).join(delimiter));
}
return rows.join(newline);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment