Skip to content

Instantly share code, notes, and snippets.

@fictorial
Created November 13, 2009 16:54
Show Gist options
  • Save fictorial/233975 to your computer and use it in GitHub Desktop.
Save fictorial/233975 to your computer and use it in GitHub Desktop.
// Process files in optimally-sized chunks without
// reading the entire file into memory.
var posix = require("posix");
this.FileOpenError = function (path, message) {
this.message = message || "failed to open file";
this.path = path;
};
this.FileReadError = function (path, message) {
this.message = message || "failed to read from file";
this.path = path;
};
function FileReader (path, encoding) {
this.path = path;
this.encoding = encoding || "utf8";
}
/**
* Returns a FileReader for the given path.
* @param path The path to the file.
* @param encoding The file's encoding; defaults to "utf8".
*/
this.create_reader = function (path, encoding) {
return new FileReader(path, encoding);
}
/**
* Opens the file for reading and reads it.
* @return An event emitter that emits the following events:
*
* "read" object for each successful read operation
* "open_error" error object failed to open file
* "read_error" error object failed to read from file
* "progress" number in [0,1] every so often
* "complete" nothing when the file has been processed
*
* @param progress_hz Emit "progress" every progress_hz reads.
* @throw FileOpenError when the file could not be opened.
* @throw FileReadError when the file could not be read.
* @return the aforementioned EventEmitter.
*/
FileReader.prototype.process = function (progress_hz) {
if (this.fd)
throw new Error("already processing a file");
if (!this.path)
throw new this.FileOpenError("missing path");
var stats;
try {
stats = posix.stat(this.path).wait(); // blocks; hmm could be ugly otherwise
} catch (e) {
}
if (!stats)
throw new this.FileOpenError(this.path, "failed to stat file");
if (!stats.isFile())
throw new this.FileOpenError(this.path, "given path is not a file");
var file_size = stats.size;
if (file_size <= 0)
throw new this.FileReadError(this.path, "empty");
var block_size = stats.blksize;
if (block_size <= 0)
block_size = 4096; // common in practice
var emitter = new process.EventEmitter();
posix.open(this.path, process.O_RDONLY, 0)
.addErrback(function () {
emitter.emit("open_error");
})
.addCallback(function (fd) {
var bytes_processed = 0;
var reads = 0;
var failed = false;
while (!failed && bytes_processed < file_size) {
try {
if (progress_hz)
emitter.emit("progress", 0);
posix.read(fd, block_size, bytes_processed, this.encoding)
.addErrback(function () {
emitter.emit("read_error");
failed = true;
})
.addCallback(function (data, bytes_read) {
buffer += data;
bytes_processed += bytes_read;
emitter.emit("read", data);
if (progress_hz && ++reads % progress_hz == 0)
emitter.emit("progress", bytes_processed / file_size);
});
} catch (e) {
emitter.emit("read_error");
failed = true;
}
}
if (!failed) {
if (progress_hz)
emitter.emit("progress", 100);
emitter.emit("complete");
}
posix.close(fd);
});
return emitter;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment