Skip to content

Instantly share code, notes, and snippets.

@carbonrobot
Created March 12, 2018 13:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save carbonrobot/50c277b62e78ca63815e01cb9783c800 to your computer and use it in GitHub Desktop.
Save carbonrobot/50c277b62e78ca63815e01cb9783c800 to your computer and use it in GitHub Desktop.
CSV Parser with Schema Validation
const csv = require('fast-csv');
const HeaderValidationError = require('./headerValidationError');
/**
* TODO: missing features
* Check for missing condtl data
* Check for missing exclusive condtl data
* Check for invalid characters in headers
* Test for non-joi validation
* Joi helper should export custom joi instance
* schema/folder, tests for schema
* FileProcessingError, determine if needed
* Document things
* Logging
*/
class CSVStreamParser {
constructor(attrMap) {
this.attrMap = Object.entries(attrMap);
}
parse(input) {
const records = [];
return new Promise((resolve, reject) => {
let headers;
let lineNumber = 0;
const parser = csv()
.on('data', data => {
if (!headers) {
headers = normalizeHeaders(data);
assertValidHeaders(this.attrMap, headers);
} else {
const record = parseRecord(this.attrMap, data, headers);
record.lineNumber = ++lineNumber;
records.push(record);
}
})
.on('error', error => reject(error))
.on('end', (k) => resolve(records));
input.pipe(parser);
});
}
}
function assertValidHeaders(attrMap, headers) {
// check for null/empty headers
if(headers.includes('')){
throw new HeaderValidationError('Null headers are not allowed');
}
// check for missing headers
const missing = attrMap
.filter(([attrName, options]) => {
if (typeof options === 'object' && options.required) {
let required = options.required;
if (typeof options.required === 'function') {
required = options.required(headers);
}
if (required) {
return headers.indexOf(options.name) < 0;
}
}
})
.map(([attrName, options]) => options.name);
if (missing.length > 0) {
const missingHeaders = missing.map(i => i.toLowerCase()).join();
throw new HeaderValidationError(`Missing required headers: ${missingHeaders}`);
}
}
function getAttrByKeyName(attrMap, headerKey) {
return attrMap.find(([attrName, options]) => {
return typeof options === 'object' ? options.name === headerKey : options === headerKey;
});
};
function normalizeHeaders(data) {
return data.map(name => name.trim().replace(/[ \/]/g, '').toLowerCase());
}
function parseRecord(attrMap, data, headers) {
const record = {};
let errors = undefined;
for (let i = 0; i < headers.length; i++) {
const headerKey = headers[i];
const value = data[i];
// only map columns that are known
const attr = getAttrByKeyName(attrMap, headerKey);
if (attr) {
const [attrName, options] = attr;
if (typeof options === 'object') {
const err = validateAttr(options, value);
if (err) {
errors = errors || [];
errors.originalData = data;
errors.push(err);
} else {
record[attrName] = value;
}
} else {
record[attrName] = value;
}
}
}
return { errors, record };
}
function validateAttr(attr, value) {
if (attr.validation) {
const validationResult = attr.validation.validate(value);
if (validationResult.error) {
return { attrName: attr.name, message: validationResult.error.message }
}
}
}
module.exports = CSVStreamParser;
const Readable = require('stream').Readable;
const os = require('os');
const Joi = require('joi');
function mockCSVStream({ headers, rows }) {
const input = new Readable();
if (headers) {
input.push(headers);
input.push(os.EOL);
}
if (rows) {
rows.forEach(row => {
input.push(row);
input.push(os.EOL);
});
}
input.push(null);
return input;
}
const CSVStreamParser = require('./csvStreamParser.js');
const HeaderValidationError = require('./headerValidationError');
describe('csv stream parser', function () {
it('should allow headers in lowercase', function () {
const input = mockCSVStream({
headers: 'lastname,firstname',
rows: ['man,super']
});
const schema = {
lastName: 'lastname'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(results => results[0].record)
.then(record => expect(record.lastName).toBeTruthy());
});
it('should allow spaces in the header', function () {
const input = mockCSVStream({
headers: 'last name,firstname',
rows: ['man,super']
});
const schema = {
lastName: 'lastname'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(results => results[0].record)
.then(record => expect(record.lastName).toBeTruthy());
});
it('should allow slashes in the header', function () {
const input = mockCSVStream({
headers: 'last / name,firstname',
rows: ['man,super']
});
const schema = {
lastName: 'lastname'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(results => results[0].record)
.then(record => expect(record.lastName).toBeTruthy());
});
it('should allow extra spaces around headers', function () {
const input = mockCSVStream({
headers: ' lastName ,firstname',
rows: ['man,super']
});
const schema = {
lastName: 'lastname'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(results => results[0].record)
.then(record => expect(record.lastName).toBeTruthy());
});
it('should ignore extra headers', function () {
const input = mockCSVStream({
headers: 'lastName,firstname',
rows: ['man,super']
});
const schema = {
lastName: 'lastname'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(results => results[0].record)
.then(record => expect(record.firstName).toBe(undefined));
});
it('should throw an error for empty headers', function () {
const input = mockCSVStream({
headers: 'lastName,,firstname',
rows: ['man,super']
});
const schema = {
lastName: 'lastname'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(() => fail())
.catch(error => expect(error).toBeDefined());
});
it('should throw an error for header that contain only whitespace', function () {
const input = mockCSVStream({
headers: 'lastName, ,firstname',
rows: ['man,bob,super']
});
const schema = {
lastName: 'lastname'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(() => fail())
.catch(error => expect(error).toBeDefined());
});
it('should support objects for schema options', function () {
const input = mockCSVStream({
headers: ' lastName ,firstname',
rows: ['man,super']
});
const schema = {
lastName: {
name: 'lastname'
}
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(results => results[0].record)
.then(record => expect(record.lastName).toBeTruthy());
});
it('should throw an error for missing required headers', function () {
const input = mockCSVStream({
headers: 'address',
rows: ['bleaker street']
});
const schema = {
lastName: {
name: 'lastname',
required: true
},
firstName: {
name: 'firstName',
required: true
},
address: 'address'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(() => fail())
.catch(error => expect(error).toBeDefined());
});
it('should conditionally require headers', function () {
const input = mockCSVStream({
headers: 'firstname,lastname,address',
rows: ['super,man,bleaker street']
});
const schema = {
lastName: 'lastname',
firstName: {
name: 'firstname',
required: headers => headers.includes('lastname')
},
address: 'address'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(results => results[0].errors)
.then(errors => expect(errors).toBeUndefined());
});
it('should log an error if a conditionally required header is missing', function () {
const input = mockCSVStream({
headers: 'lastname,address',
rows: ['man,bleaker street']
});
const schema = {
lastName: 'lastname',
firstName: {
name: 'firstname',
required: headers => headers.includes('lastname')
},
address: 'address'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(() => fail())
.catch(error => expect(error).toBeDefined());
});
it('should throw an error if exclusively required headers are missing', function () {
const input = mockCSVStream({
headers: 'address',
rows: ['bleaker street']
});
const schema = {
employeeId: {
name: 'employeeid',
required: headers => !headers.includes('tascid')
},
tascId: {
name: 'tascid',
required: headers => !headers.includes('employeeid')
},
address: 'address'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(() => fail())
.catch(error => expect(error).toBeDefined());
});
it('should log a custom error if exclusive required headers are missing', function () {
const input = mockCSVStream({
headers: 'address',
rows: ['bleaker street']
});
const expectedErrorMessage = 'TascId or employeeId is required';
const checkRequiredHeaders = headers => {
if (!(headers.includes('tascid') || headers.includes('employeeid'))) {
throw new HeaderValidationError(expectedErrorMessage);
}
};
const schema = {
employeeId: {
name: 'employeeid',
required: checkRequiredHeaders
},
tascId: {
name: 'tascid',
required: checkRequiredHeaders
},
address: 'address'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(() => fail())
.catch(error => expect(error.message).toBe(expectedErrorMessage));
});
it('should return a record for each line in a file', function () {
const input = mockCSVStream({
headers: 'lastName,firstName',
rows: [
'man,super',
'willis,bob'
]
});
const schema = {
lastName: {
name: 'lastname',
validation: Joi.string().length(1)
}
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(results => expect(results.length).toBe(2));
});
it('should log an error for missing required data', function () {
const input = mockCSVStream({
headers: 'firstname,lastname,address',
rows: ['super,,bleaker street']
});
const schema = {
lastName: {
name: 'lastname',
validation: Joi.string().required()
},
firstName: 'firstname'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(results => results[0].errors)
.then(errors => expect(errors.length).toBe(1));
});
it('should log a custom error message', function () {
const input = mockCSVStream({
headers: 'firstname,lastname,address',
rows: ['super,,bleaker street']
});
const expectedErrorMessage = 'Last Name is required';
const schema = {
lastName: {
name: 'lastname',
validation: Joi.string().required().error(() => expectedErrorMessage)
},
firstName: 'firstname'
};
const parser = new CSVStreamParser(schema);
return parser
.parse(input)
.then(results => results[0].errors)
.then(([error]) => expect(error.message).toBe(expectedErrorMessage));
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment