Skip to content

Instantly share code, notes, and snippets.

@amatiasq
Created November 9, 2018 11:10
Show Gist options
  • Save amatiasq/869b94fe78afc7ecd197f659ed8736d6 to your computer and use it in GitHub Desktop.
Save amatiasq/869b94fe78afc7ecd197f659ed8736d6 to your computer and use it in GitHub Desktop.
CSV Parsers

CSV Parsers

This provides a set of parser classes for CSV to and from JSON. If the debug library is available (npm i debug) you can see debug information with DEBUG='CSV*' environment variable.

CSV Reader

This will read a CSV file and return JSON objects from it. It will use the first row in the CSV file to create the properties in the result object.

Usage

(async () => {
  const reader = new CSVReader('input.csv', {/* options */);
  await reader.open();
  
  while (!reader.isOver) {
    const entry = await reader.readNext();
    
    // It is possible in certain cases that the last return entry is null
    if (!entry) break;
    
    console.log(entry.myNamedColumn);
  }
  
  await reader.close();
})();

Options

  • chunkSize: Amount of bytes to read in each chunk. Defaults to 1 * 1024 * 1024 (1mb).
  • separator: Column separator in CSV file. Defaults to comma (,).
  • rowSeparator: Row separator in CSV file. Defaults to line break (\n).

CSV Writter

This will write a CSV file from the JSON objects passed to it. It will use the properties in the first JSON object as the columns for the CSV file. Any property not existing in the first passed object will not be added to the CSV file!

Usage

(async () => {
  const writter = new CSVWritter('output.csv', {/* options */);
  await writter.open();
  
  for (let i = 0; i < 100000; i++) {
    await writter.write({
      id: i,
      otherColumn: 'test',
    });
  }
  
  await writter.close();
})();

Options

  • separator: Column separator in CSV file. Defaults to comma (,).
  • rowSeparator: Row separator in CSV file. Defaults to line break (\n).
const fs = require('fs');
const { promisify } = require('util');
const fs_open = promisify(fs.open);
const fs_read = promisify(fs.read);
const fs_close = promisify(fs.close);
let debug;
try {
debug = require('debug');
} catch(error) {
debug = () => () => {};
}
module.exports = class CSVReader {
constructor(filename, {
chunkSize = 1 * 1024 * 1024,
separator = ',',
rowSeparator = '\n',
} = {}) {
this.log = debug(`CSVReader "${filename}"`)
this.chunkSize = chunkSize;
this.separator = separator;
this.rowSeparator = rowSeparator;
this.file = filename;
this.buffer = null;
this.cursor = 0;
this.fd = null;
this.rest = '';
this.header = null;
this._isOver = false;
}
get isOver() {
return this._isOver;
}
async open() {
this.fd = await fs_open(this.file, 'r');
this.buffer = Buffer.alloc(this.chunkSize);
this.log('Open.');
}
async _readHeader() {
const line = await this._getNextLine();
this.header = line.split(this.separator);
this.log(`Columns: ${this.header}`);
return line !== '';
}
async readNext() {
if (!this.header) {
if (!await this._readHeader()) {
this.log('File is empty. Returning null.');
return null;
}
}
const line = await this._getNextLine();
if (!line && this._isOver) {
this.log('File is over. Returning null.');
return null;
}
const cells = line.split(this.separator);
const result = {};
for (let i = 0; i < this.header.length; i++) {
result[this.header[i]] = cells[i];
}
return result;
}
async _getNextLine() {
if (this._isOver) {
throw new Error(`Can't read! file is over: "${this.file}"`);
}
const buffer = this.buffer;
let content = this.rest;
while (!content.includes(this.rowSeparator)) {
const { bytesRead: bytes } = await fs_read(this.fd, buffer, 0, this.chunkSize, this.cursor);
this.cursor += bytes;
this.log(`Reading ${bytes} bytes.`);
if (bytes === 0) {
this.close();
this.rest = '';
return content;
}
if (bytes < this.chunkSize) {
this.close();
content += buffer.slice(0, bytes).toString();
} else {
content += buffer.toString();
}
}
const index = content.indexOf(this.rowSeparator);
if (index === -1) {
this.rest = '';
return content;
}
this.rest = content.slice(index + 1);
return content.slice(0, index);
}
async close() {
if (this._isOver) {
return;
}
this.log('Closed.');
this._isOver = true;
return fs_close(this.fd);
}
};
const fs = require('fs');
let debug;
try {
debug = require('debug');
} catch(error) {
debug = () => () => {};
}
module.exports = class CSVWritter {
constructor(filename, {
separator = ',',
rowSeparator = '\n',
} = {}) {
this.log = debug(`CSVWritter "${filename}"`);
this.separator = separator;
this.rowSeparator = rowSeparator;
this.file = filename;
this.header = null;
this.stream = null;
this._isClosed = false;
}
async open() {
this.stream = fs.createWriteStream(this.file);
this.log('Open.');
}
async _writeHeader(row) {
this.header = Object.keys(row);
this.log(`Columns: ${this.header}.`);
await this.stream.write(`${this.header.join(this.separator)}${this.rowSeparator}`);
}
async write(row) {
if (!this.header) {
await this._writeHeader(row);
}
const cells = [];
for (const col of this.header) {
cells.push(row[col]);
}
const line = cells.join(this.separator);
return this.stream.write(`${line}${this.rowSeparator}`);
}
async close() {
if (this._isClosed) {
return null;
}
this.stream.close();
this._isClosed = true;
this.log('Closed.');
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment