Last active
January 11, 2016 05:09
-
-
Save mjpearson/eaf0733f0c5845fd987c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
/* | |
refs [interview question](https://gist.github.com/mjpearson/0ea04a4ba886d3f02333) | |
@author michael pearson <mjpearson@gmail.com> | |
To turn on debugging, from shell : `export NODE_DEBUG=1` | |
If execute bit set on toq_answer.js, run with `cat input.txt | ./toq_answer.js` | |
otherwise... `cat input.txt | /usr/bin/env node ./toq_answer.js` | |
### Synopsis | |
This solution unpacks testing patterns into a hash table based tree struct. Each input line in the stream is then | |
tokenized from a delimiter. Iterating through the tokens updates a set of pointers, resolving a depth first tree search. | |
Handles exact matches or weighted wildcards. | |
For example, the tree from the testing file would look something like... | |
patternTree (pointer) | |
| | |
* ---- a - foo - w | |
| | | | | | | |
* b x * bar x | |
| | | | | | | |
c * y * baz * | |
| | | |
z * | |
Based on the spec, the steam appears to resolve over time after lines which resolve after 2 integer delimiters. | |
Firstly to define the pattern structure, and then to define the matched lines. | |
*/ | |
// input token delimiter | |
const DELIM_INPUT = '/'; | |
// pattern match token delimiter | |
const DELIM_PATTERN = ','; | |
// wildcard delimiter | |
const DELIM_WILDCARD = '*'; | |
// how important wildcards are for proximity. A larger value means less important | |
// ideally a fib sequence over token length | |
const WILDCARD_WEIGHT = 2; | |
var readline = require('readline'), | |
// patterns tree | |
patternTree = {}, | |
// matched patterns | |
patternMatches = {}, | |
// expected # patterns | |
numPatterns = -1, | |
// expected # inputs | |
numInputs = -1, | |
// stdin event handler | |
stdInputStream = readline.createInterface({ | |
input: process.stdin, | |
output: process.stdout, | |
terminal: false | |
}); | |
// override string trim prototype to drop lead and end slashes,spaces | |
// global matches don't cache nicely, so considering regex caching out of scope | |
String.prototype.trim = function() { | |
return this.replace(/^[\s\/]+|[\s\/]+$/g, ''); | |
} | |
// debug/log helper. passes through to console.log when env variable set | |
// To turn on debugging, from shell : `export NODE_DEBUG=1` | |
function debug() { | |
if (1 == process.env.NODE_DEBUG) { | |
console.log.call(console, arguments); | |
} | |
} | |
/* | |
* performs recursive depth-first search of a hash table, | |
* search parallelizes when it encounters a '*' tree pointer | |
* | |
* @param inputTokens array input line tokens, eg ['a', 'b', 'c'] | |
* @param treePtr object pointer into tree structure | |
* @param treePath string name of resolved tree path | |
* @param proxmityContainer object treePath > # hops map | |
*/ | |
function matchPatterns(inputTokens, treePtr, treePath, proximityContainer) { | |
debug('ARGS', arguments, Object.keys(treePtr).length); | |
// | |
// if there are input tokens, then inspect the tree | |
// | |
if (inputTokens.length) { | |
var token = inputTokens.shift(), // shift off the first token | |
exactPtr = treePtr[token], // look for an exact pointer | |
wildcardPtr = treePtr[DELIM_WILDCARD]; // look for a wildcard pointer | |
// add path delimiter to the tree path | |
if (treePath) { | |
treePath += DELIM_PATTERN; | |
} | |
/* | |
* for each of exactPtr and wildcardPtr if they exist, make a copy of | |
* the tokens array and parallelise the search for each tree pointer. | |
* | |
*/ | |
// inspect the tree path for an exact match pointer | |
if (exactPtr) { | |
debug('EXACT::GOT EXACT MATCH FOR ', token, exactPtr); | |
matchPatterns( | |
inputTokens.slice(), | |
exactPtr, | |
treePath + token, | |
proximityContainer | |
); | |
} | |
// inspect the tree path for a wildcard pointer | |
if (wildcardPtr) { | |
debug('WILDCARD::GOT WILDCARD MATCH FOR ', token); | |
matchPatterns( | |
inputTokens.slice(), | |
wildcardPtr, | |
treePath + DELIM_WILDCARD, | |
proximityContainer | |
); | |
} | |
return false; | |
} else { | |
debug('PTR LENGTH', treePtr.length) | |
// if there are no more input tokens left | |
// and this branch continues then this | |
// is a length mismatch, so bail. | |
if (treePtr && Object.keys(treePtr).length) { | |
return false; | |
} else { | |
debug('PASSING', treePath); | |
// flag the proximity container | |
if (undefined === proximityContainer[treePath]) { | |
proximityContainer[treePath] = 0; | |
// create a # hops map. Less is closer | |
// so we offset wildcards as being further away | |
matchTokens = treePath.split(DELIM_PATTERN); | |
for (var i = 0; i < matchTokens.length; i++) { | |
if (DELIM_WILDCARD === matchTokens[i]) { | |
// +WILDCARD_WEIGHT so that -1 exact matches are weighted closer | |
proximityContainer[treePath] += WILDCARD_WEIGHT; | |
} else { | |
proximityContainer[treePath]--; | |
} | |
} | |
debug(proximityContainer) | |
} | |
} | |
} | |
} | |
/* | |
* Returns the best matching pattern for a line, or false for none | |
* | |
* @param line string line to inspect, delimitered by DELIM_INPUT | |
* @return mixed string|boolean matching string or false for none | |
*/ | |
function bestMatch(line) { | |
var container = {}, | |
bestMatch = false; | |
// start matching from the top level, global tree container | |
matchPatterns( | |
line.split(DELIM_INPUT), | |
patternTree, | |
'', | |
container | |
); | |
debug('CONTAINER ', container); | |
// Container has a collection of matches, keyed to their 'exactness' proximity | |
// so iterate through the container to find the first lowest weighted pattern | |
for (var k in container) { | |
if (container.hasOwnProperty(k)) { | |
if (!bestMatch || container[k] < container[bestMatch]) { | |
bestMatch = k; | |
} | |
} | |
} | |
debug('BEST MATCH ', bestMatch); | |
return bestMatch; | |
} | |
/* | |
* standard input stream handler. | |
* builtin, for every line, a 'line' event is emitted by the stream | |
* | |
*/ | |
stdInputStream.on('line', function processLine(line) { | |
var lineInt, | |
patternTokens, | |
testTokens, | |
matchedPattern = '', | |
token, | |
treePtr = patternTree; | |
// normalize line | |
line = line.trim(); | |
// skip empty lines | |
if (!line) { | |
return; | |
} | |
// find line number | |
lineInt = parseInt(line); | |
// looks like we've got a # patterns definition? then expect # inputs | |
if (!isNaN(lineInt) && -1 === numPatterns) { | |
numPatterns = lineInt; | |
// looks like we've got a # inputs definition? then end patterns and expect # inputs | |
} else if (!isNaN(lineInt) && -1 === numInputs) { | |
numInputs = lineInt; | |
numPatterns = -1; | |
} else { | |
// build pattern tree from stream | |
if (--numPatterns > -1) { | |
patternMatches[line] = false; | |
patternTokens = line.split(DELIM_PATTERN); | |
// exploit javascripts array-as-object-ness (?) | |
// to create a tree struct. pointers to empty arrays | |
// are leafs | |
for (var i = 0; i < patternTokens.length; i++) { | |
token = patternTokens[i]; | |
if (undefined == treePtr[token]) { | |
treePtr[token] = []; | |
} | |
treePtr = treePtr[token]; | |
} | |
// find best matching patterns from input stream section | |
} else if (--numInputs > -1) { | |
debug('MATCHING LINE ', numInputs, line) | |
matchedPattern = bestMatch(line, patternTree, ''); | |
console.log(matchedPattern || 'NO MATCH'); | |
} | |
} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment