Skip to content

Instantly share code, notes, and snippets.

@trentm
Created August 1, 2012 06:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save trentm/3224233 to your computer and use it in GitHub Desktop.
Save trentm/3224233 to your computer and use it in GitHub Desktop.
Index: /Users/trentm/tm/json/lib/jsontool.js
index 734729c..d2764f9 100755
--- a/lib/jsontool.js
+++ b/lib/jsontool.js
@@ -399,36 +399,140 @@ function parseArgv(argv) {
* Get input from either given file paths or stdin.
*
* @param opts {Object} Parsed options.
- * @param callback {Function} `function (err, callback)` where err is an
- * error string if there was a problem.
+ * @param callback {Function} `function (err, chunk)` where err is an
+ * error string if there was a problem. This is called once for each
+ * "chunk". XXX START HERE: splain chunks
*/
-function getInput(opts, callback) {
- if (opts.inputFiles.length === 0) {
- // Read from stdin.
- var chunks = [];
-
- var stdin = process.openStdin();
- stdin.setEncoding('utf8');
- stdin.on('data', function (chunk) {
- chunks.push(chunk);
- });
+function jsonChunksFromInput(opts, callback) {
+ // If returned from `stripHeaders()` and `finishedHeaders` is still false,
+ // then we've process a chunk with an incomplete set of headers:
+ // `stripHeaders()` should be called again with the next chunk.
+ var finishedHeaders = false;
+ function stripHeaders(s) {
+ // Take off a leading HTTP header if any and pass it through.
+ while (true) {
+ if (s.slice(0,5) === "HTTP/") {
+ var index = s.indexOf('\r\n\r\n');
+ var sepLen = 4;
+ if (index == -1) {
+ index = s.indexOf('\n\n');
+ sepLen = 2;
+ }
+ if (index != -1) {
+ if (! opts.dropHeaders) {
+ emit(s.slice(0, index+sepLen));
+ }
+ var is100Continue = (s.slice(0, 21) === "HTTP/1.1 100 Continue");
+ s = s.slice(index+sepLen);
+ if (is100Continue) {
+ continue;
+ }
+ finishedHeaders = true;
+ }
+ } else {
+ finishedHeaders = true;
+ }
+ break;
+ }
+ //console.warn("XXX stripHeaders done, finishedHeaders=%s", finishedHeaders)
+ return s;
+ }
- stdin.on('end', function () {
- callback(null, chunks.join(''));
- });
- } else {
+ if (opts.inputFiles.length > 0) {
// Read input files.
+ // TODO: Improve streaming here: read files async in chunks and stream
+ // as above if `-ga`.
var i = 0;
var chunks = [];
try {
- for (; i < opts.inputFiles.length; i++) {
- chunks.push(fs.readFileSync(opts.inputFiles[i], 'utf8'));
+ var first = fs.readFileSync(opts.inputFiles[i], 'utf8');
+ first = stripHeaders(first);
+ callback(null, first);
+ for (i++; i < opts.inputFiles.length; i++) {
+ callback(null, fs.readFileSync(opts.inputFiles[i], 'utf8'));
}
} catch (e) {
return callback(
format('could not read "%s": %s', opts.inputFiles[i], e));
}
- callback(null, chunks.join(''));
+ } else if (opts.group && opts.array && opts.outputMode !== OM_JSON) {
+ // Streaming from stdin.
+ //console.warn("XXX streaming");
+ var streaming = true;
+ var leftover = '';
+ var chunks = [];
+ var splitter = /(})(\s*\n\s*)?({\s*")/;
+ function callbackJsonChunks(chunk) {
+ if (chunk[0] !== '{') { // Only support streaming consecutive *objects*.
+ streaming = false;
+ chunks.push(chunk);
+ return;
+ }
+ /* Example:
+ * > '{"a":"b"}\n{"a":"b"}\n{"a":"b"}'.split(/(})(\s*\n\s*)?({\s*")/)
+ * [ '{"a":"b"',
+ * '}',
+ * '\n',
+ * '{"',
+ * 'a":"b"',
+ * '}',
+ * '\n',
+ * '{"',
+ * 'a":"b"}' ]
+ */
+ var bits = chunk.split(splitter);
+ //console.warn("XXX bits: ", bits)
+ if (bits.length === 1) {
+ leftover = chunk;
+ } else {
+ var n = bits.length - 2;
+ callback(null, bits[0] + bits[1]);
+ for (var i = 3; i < n; i += 4) {
+ callback(null, bits[i] + bits[i+1] + bits[i+2]);
+ }
+ leftover = bits[n] + bits[n+1];
+ }
+ }
+
+ var stdin = process.openStdin();
+ stdin.setEncoding('utf8');
+ stdin.on('data', function (chunk) {
+ //console.warn("XXX process chunk: %s", JSON.stringify(chunk))
+ if (!streaming) {
+ chunks.push(chunk);
+ return;
+ }
+ var s = leftover + chunk;
+ if (!finishedHeaders) {
+ s = stripHeaders(s);
+ }
+ if (!finishedHeaders) {
+ leftover = s;
+ } else {
+ callbackJsonChunks(s);
+ }
+ });
+ stdin.on('end', function () {
+ if (!streaming) {
+ callback(null, chunks.join(''));
+ } else if (leftover) {
+ callbackJsonChunks(leftover);
+ callback(null, leftover);
+ }
+ });
+ } else {
+ // Read stdin in one big chunk.
+ var stdin = process.openStdin();
+ stdin.setEncoding('utf8');
+ var chunks = [];
+ stdin.on('data', function (chunk) {
+ chunks.push(chunk);
+ });
+ stdin.on('end', function () {
+ var chunk = chunks.join('');
+ chunk = stripHeaders(chunk);
+ callback(null, chunks.join(''));
+ });
}
}
@@ -590,6 +694,7 @@ function parseInput(buffer, group, merge) {
// This condition should be fine for typical use cases and ensures
// no false matches inside JS strings.
var newBuffer = buffer;
+ //XXX START HERE
[/(})\s*\n\s*({)/g, /(})({")/g].forEach(function (pat) {
newBuffer = newBuffer.replace(pat, "$1,\n$2");
});
@@ -820,55 +925,33 @@ function main(argv) {
}
var lookupStrs = opts.args;
- getInput(opts, function (err, buffer) {
+ jsonChunksFromInput(opts, function (err, chunk) {
+ //console.warn("XXX chunk: '%s'", chunk)
if (err) {
warn("json: error: %s", err)
return drainStdoutAndExit(1);
}
- // Take off a leading HTTP header if any and pass it through.
- while (true) {
- if (buffer.slice(0,5) === "HTTP/") {
- var index = buffer.indexOf('\r\n\r\n');
- var sepLen = 4;
- if (index == -1) {
- index = buffer.indexOf('\n\n');
- sepLen = 2;
- }
- if (index != -1) {
- if (! opts.dropHeaders) {
- emit(buffer.slice(0, index+sepLen));
- }
- var is100Continue = (buffer.slice(0, 21) === "HTTP/1.1 100 Continue");
- buffer = buffer.slice(index+sepLen);
- if (is100Continue) {
- continue;
- }
- }
- }
- break;
- }
-
// Expect the remainder to be JSON.
- if (! buffer.length) {
+ if (! chunk.length) {
return;
}
// parseInput() -> {datum: <input object>, error: <error object>}
- var input = parseInput(buffer, opts.group, opts.merge);
+ var input = parseInput(chunk, opts.group, opts.merge);
if (input.error) {
// Doesn't look like JSON. Just print it out and move on.
if (! opts.quiet) {
// Use JSON-js' "json_parse" parser to get more detail on the
// syntax error.
var details = "";
- var normBuffer = buffer.replace(/\r\n|\n|\r/, '\n');
+ var normBuffer = chunk.replace(/\r\n|\n|\r/, '\n');
try {
json_parse(normBuffer);
details = input.error;
} catch(err) {
// err.at has the position. Get line/column from that.
var at = err.at - 1; // `err.at` looks to be 1-based.
- var lines = buffer.split('\n');
+ var lines = chunk.split('\n');
var line, col, pos = 0;
for (line = 0; line < lines.length; line++) {
pos += lines[line].length + 1;
@@ -887,8 +970,8 @@ function main(argv) {
warn("json: error: input is not JSON: %s", details);
}
if (!opts.validate) {
- emit(buffer);
- if (buffer.length && buffer[buffer.length-1] !== "\n") {
+ emit(chunk);
+ if (chunk.length && chunk[chunk.length-1] !== "\n") {
emit('\n');
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment