Skip to content

Instantly share code, notes, and snippets.

@mikedeboer
Last active August 29, 2015 14:01
Show Gist options
  • Save mikedeboer/685239acc409542af48a to your computer and use it in GitHub Desktop.
Save mikedeboer/685239acc409542af48a to your computer and use it in GitHub Desktop.
Node.JS wrapped GREP search command builder... ack on steroids, if you will!
#!/usr/bin/env node
/* Any copyright is dedicated to the Public Domain.
* http://creativecommons.org/publicdomain/zero/1.0/ */
"use strict";
var Fs = require("fs");
var Path = require("path");
var Spawn = require("child_process").spawn;
function escapeRegExp(str) {
return str.replace(/([.*+?\^${}()|\[\]\/\\])/g, "\\$1");
};
// taken from http://xregexp.com/
function grepEscapeRegExp(str) {
return str.replace(/[[\]{}()*+?.,\\^$|#\s"']/g, "\\$&");
}
function escapeShell(str) {
return str.replace(/([\\"'`$\s\(\)<>])/g, "\\$1");
}
/**
* Make sure that an array instance contains only unique values (NO duplicates).
*
* @type {Array}
*/
function makeUnique(arr) {
var i, length, newArr = [];
for (i = 0, length = arr.length; i < length; i++) {
if (newArr.indexOf(arr[i]) == -1)
newArr.push(arr[i]);
}
arr.length = 0;
for (i = 0, length = newArr.length; i < length; i++)
arr.push(newArr[i]);
return arr;
}
/**
* Removes trailing whitespace
* version: 1107.2516
* from: http://phpjs.org/functions/rtrim
* original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
* example 1: rtrim(' Kevin van Zonneveld ');
* returns 1: ' Kevin van Zonneveld'
*/
function rtrim(str, charlist) {
charlist = !charlist ? " \\s\u00A0" : (charlist + "").replace(/([\[\]\(\)\.\?\/\*\{\}\+\$\^\:])/g, "\\$1");
var re = new RegExp("[" + charlist + "]+$", "g");
return (str + "").replace(re, "");
}
var DEBUG = 0;//1;
var IGNORE_DIRS = {
".bzr" : "Bazaar",
".cdv" : "Codeville",
"~.dep" : "Interface Builder",
"~.dot" : "Interface Builder",
"~.nib" : "Interface Builder",
"~.plst" : "Interface Builder",
".git" : "Git",
".hg" : "Mercurial",
".pc" : "quilt",
".svn" : "Subversion",
"_MTN" : "Monotone",
"blib" : "Perl module building",
"CVS" : "CVS",
"RCS" : "RCS",
"SCCS" : "SCCS",
"_darcs" : "darcs",
"_sgbak" : "Vault/Fortress",
"autom4te.cache" : "autoconf",
"cover_db" : "Devel::Cover",
"_build" : "Module::Build"
};
var MAPPINGS = {
"actionscript": ["as", "mxml"],
"ada" : ["ada", "adb", "ads"],
"asm" : ["asm", "s"],
"batch" : ["bat", "cmd"],
//"binary" : q{Binary files, as defined by Perl's -B op (default: off)},
"cc" : ["c", "h", "xs"],
"cfmx" : ["cfc", "cfm", "cfml"],
"clojure" : ["clj"],
"cpp" : ["cpp", "cc", "cxx", "m", "hpp", "hh", "h", "hxx"],
"csharp" : ["cs"],
"css" : ["css", "less", "scss", "sass"],
"coffee" : ["coffee"],
"elisp" : ["el"],
"erlang" : ["erl", "hrl"],
"fortran" : ["f", "f77", "f90", "f95", "f03", "for", "ftn", "fpp"],
"haskell" : ["hs", "lhs"],
"hh" : ["h"],
"html" : ["htm", "html", "shtml", "xhtml"],
"jade" : ["jade"],
"java" : ["java", "properties"],
"groovy" : ["groovy"],
"js" : ["js"],
"json" : ["json"],
"latex" : ["latex", "ltx"],
"jsp" : ["jsp", "jspx", "jhtm", "jhtml"],
"lisp" : ["lisp", "lsp"],
"lua" : ["lua"],
"make" : ["makefile", "Makefile"],
"mason" : ["mas", "mhtml", "mpl", "mtxt"],
"markdown" : ["md", "markdown"],
"objc" : ["m", "h"],
"objcpp" : ["mm", "h"],
"ocaml" : ["ml", "mli"],
"parrot" : ["pir", "pasm", "pmc", "ops", "pod", "pg", "tg"],
"perl" : ["pl", "pm", "pod", "t"],
"php" : ["php", "phpt", "php3", "php4", "php5", "phtml"],
"plone" : ["pt", "cpt", "metadata", "cpy", "py"],
"powershell" : ["ps1"],
"python" : ["py"],
"rake" : ["rakefile"],
"ruby" : ["rb", "ru", "rhtml", "rjs", "rxml", "erb", "rake", "gemspec"],
"scala" : ["scala"],
"scheme" : ["scm", "ss"],
"shell" : ["sh", "bash", "csh", "tcsh", "ksh", "zsh"],
//"skipped" : "q"{"Files but not directories normally skipped by ack ("default": "off")},
"smalltalk" : ["st"],
"sql" : ["sql", "ctl"],
"tcl" : ["tcl", "itcl", "itk"],
"tex" : ["tex", "cls", "sty"],
"text" : ["txt"],
"textile" : ["textile"],
"tt" : ["tt", "tt2", "ttml"],
"vb" : ["bas", "cls", "frm", "ctl", "vb", "resx"],
"vim" : ["vim"],
"yaml" : ["yaml", "yml"],
"xml" : ["xml", "dtd", "xslt", "ent", "rdf", "rss", "svg", "wsdl", "atom", "mathml", "mml"]
};
var exts = [];
for (var type in MAPPINGS) {
exts = exts.concat(MAPPINGS[type]);
}
// grep pattern matching for extensions
var PATTERN_EXT = makeUnique(exts).join(",");
var dirs = [];
for (type in IGNORE_DIRS) {
dirs.push(type);
}
dirs = makeUnique(dirs);
var PATTERN_DIR = escapeRegExp(dirs.join("|"));
var PATTERN_EDIR = dirs.join(",");
var GREP_CMD = "grep";
var PERL_CMD = "perl";
var PREVIOUS_FILE;
var ARGS_MAP = {
casesensitive: {
short: "i",
type: "boolean",
def: false,
desc: "whether to match strings sensitive to upper or lower case characters"
},
pattern: {
short: "p",
type: "string",
desc: "a comma-separated list of file patterns to include in the search"
},
maxresults: {
short: "m",
type: "number",
desc: "the maximum number of search results to return"
},
wholeword: {
short: "w",
type: "boolean",
def: false,
desc: "only match strings that are delimited by non-word characters"
},
query: {
short: "q",
type: "string",
index: 0,
req: true,
desc: "string to search for. The needle. Put quotes around queries with multiple words or regular expressions"
},
path: {
short: "p",
type: "string",
def: process.cwd(),
index: 1,
desc: "specific path to use as the root of the search operation. Defaults to the current working dir"
},
replaceAll: {
short: "a",
type: "boolean",
def: false,
desc: "during a replace action, replace all occurrences"
},
replace: {
short: "r",
type: "string",
def: false,
desc: "string to replace a match with"
},
regexp: {
short: "x",
type: "boolean",
def: false,
desc: "whether [query] should be interpreted as a regular expression"
}
};
var ARGS_MAP_SHORT = {};
var ARGS_MAP_INDEXED = [];
var ARG_NAMES = Object.keys(ARGS_MAP);
ARG_NAMES.forEach(function(arg) {
ARGS_MAP[arg].name = arg;
var shortName = ARGS_MAP[arg].short;
if (shortName)
ARGS_MAP_SHORT[shortName] = arg;
var index = ARGS_MAP[arg].index;
if (typeof index == "number") {
ARGS_MAP_INDEXED.splice(index >= 0 || index < ARGS_MAP_INDEXED.length - 1 ?
index : Infinity, 0, ARGS_MAP[arg]);
}
});
function usageDef(arg, indexed) {
return (arg.short && !indexed ? "-" + arg.short + "/ " : "")
+ (!indexed ? "--" : "") + arg.name
+ "\t\t" + (arg.desc ? arg.desc : "")
+ (arg.req ? " [required]" : (typeof arg.def != "undefined" ?
" [default=" + String(arg.def) + "]" : " [optional]") + ".");
}
function usage() {
var names = [].concat(ARG_NAMES).sort();
var indexed = ARGS_MAP_INDEXED.map(function(arg) {
names.splice(names.indexOf(arg.name), 1);
return arg.name;
});
var cmd = "cs ";
var cmdArgs = [];
var defs = [];
names.forEach(function(argName) {
var arg = ARGS_MAP[argName];
defs.push(usageDef(arg));
cmdArgs.push("[" + (arg.short ? "-" + arg.short + "|" : "") + "--" + argName
+ (arg.type == "boolean" ? "" : " " + arg.type) + "]");
});
indexed.forEach(function(argName) {
var arg = ARGS_MAP[argName];
var req = !!arg.req;
defs.unshift(usageDef(arg, true));
cmdArgs.push((req ? "" : "[") + argName + (req ? "" : "]"));
});
console.log("Usage: " + cmd + cmdArgs.join(" "));
console.log("\n\n" + defs.join("\n"));
process.exit(1);
}
function parseArgs() {
var options = {};
// Remove 'node' and script path from the argv list.
process.argv.splice(0, 2);
var argCount = process.argv.length;
if (!argCount)
return usage();
var noNameIdx = -1;
var currName, currArg;
for (; argCount >= 0; --argCount) {
currName = (process.argv[argCount - 1] || "").replace(/^[-]*/g, "");
currArg = ARGS_MAP[currName] || ARGS_MAP[ARGS_MAP_SHORT[currArg]];
if (!currArg) {
// Un-named, indexed arguments.
currArg = ARGS_MAP_INDEXED[noNameIdx++];
if (!currArg)
continue;
options[currArg.name] = process.argv[argCount];
} else {
var bool = (currArg.type == "boolean");
options[currArg.name] = bool ? true : process.argv[argCount];
if (!bool)
--argCount;
}
}
ARG_NAMES.forEach(function(arg) {
if (typeof options[arg] != "undefined")
return;
if (DEBUG)
console.log("DEBUG:: unused arg: ", arg, ARGS_MAP[arg].def)
if (typeof ARGS_MAP[arg].def != "undefined")
options[arg] = ARGS_MAP[arg].def;
else if (ARGS_MAP[arg].type == "boolean")
options[arg] = false;
else
options[arg] = null;
});
return options;
}
var count = 0;
var filecount = 0;
function parseSearchResult(res, basePath, options) {
var parts, file, lineno;
var result = "";
var aLines = (typeof res == "string" ? res : "").split(/([\n\r]+)/g);
var i = 0;
var l = aLines.length;
for (; i < l; ++i) {
parts = aLines[i].split(":");
if (parts.length < 3)
continue;
file = encodeURI(rtrim(parts.shift().replace(basePath, "")), "/");
lineno = parseInt(parts.shift(), 10);
if (!lineno)
continue;
++count;
if (file !== PREVIOUS_FILE) {
filecount++;
if (PREVIOUS_FILE)
result += "\n\n";
result += file + ":";
PREVIOUS_FILE = file;
}
parts = parts.map(function(part) {
return part.replace(basePath, "");
});
result += "\n\t" + lineno + ": " + parts.join(":");
}
process.stdout.write(result);
}
function buildGrepCommand(options) {
var cmd = GREP_CMD + " -s -r --color=never --binary-files=without-match -n "
+ (!options.casesensitive ? "-i " : "");
var include = "";
if (options.pattern) { // handles grep peculiarities with --include
if (options.pattern.split(",").length > 1)
include = "{" + options.pattern + "}";
else
include = options.pattern;
} else {
include = "\\*{" + PATTERN_EXT + "}";
}
if (options.maxresults)
cmd += "-m " + parseInt(options.maxresults, 10);
if (options.wholeword)
cmd += " -w";
var query = options.query;
// grep has a funny way of handling new lines (that is to say, it's non-existent)
// if we're not doing a regex search, then we must split everything between the
// new lines, escape the content, and then smush it back together; due to
// new lines, this is also why we're now passing -P as default to grep
if (!options.replaceAll && !options.regexp) {
var splitQuery = query.split("\\n");
for (var q in splitQuery)
splitQuery[q] = grepEscapeRegExp(splitQuery[q]);
query = splitQuery.join("\\n");
}
// ticks must be double escaped for BSD grep
query = query.replace(new RegExp("\\\'", "g"), "'\\''");
cmd += " --exclude=*{" + PATTERN_EDIR + "}*"
+ " --include=" + include
+ " '" + query.replace(/-/g, "\\-") + "'"
+ " \"" + escapeShell(options.path) + "\"";
if (options.replaceAll) {
if (options.replacement === undefined)
options.replacement = "";
if (!options.regexp)
query = escapeRegExp(query);
// pipe the grep results into perl
cmd += " -l | xargs " + PERL_CMD
// print the grep result to STDOUT (to arrange in parseSearchResult())
+ " -pi -e 'print STDOUT \"$ARGV:$.:$_\""
// do the actual replace
+ " if s/" + query + "/" + options.replacement + "/mg"
+ (!options.casesensitive ? "i" : "" ) + ";'"
}
return cmd;
}
function main() {
var options = parseArgs();
if (DEBUG)
console.log("DEBUG:: options: ", options);
var cmd = buildGrepCommand(options);
if (DEBUG)
console.log("DEBUG:: search command: " + cmd);
var child;
try {
child = Spawn("/bin/bash", ["-c", cmd]);
} catch (e) {
console.error("Could not spawn grep process");
process.exit(1);
}
child.stdout.setEncoding("utf8");
child.stderr.setEncoding("utf8");
var buf = "";
function onData(data) {
if (!data)
return;
buf += data;
if (data.indexOf("\n") >= 0) {
if (DEBUG)
console.log("DEBUG:: " + data);
parseSearchResult(data, options.path, options);
buf = "";
}
}
child.stdout.on("data", onData);
child.stderr.on("data", onData);
child.on("exit", function(code, signal) {
process.stdout.write("\nResults: {\"count\": " + count + ", \"filecount\":"
+ filecount + "}\n");
});
}
main();
Usage: cs [-i|--casesensitive] [-m|--maxresults number] [-p|--pattern string] [-x|--regexp] [-r|--replace string] [-a|--replaceAll] [-w|--wholeword] query [path]
path specific path to use as the root of the search operation. Defaults to the current working dir [default=/Users/mikedeboer/Projects/jsDAV].
query string to search for. The needle. Put quotes around queries with multiple words or regular expressions [required]
-i/ --casesensitive whether to match strings sensitive to upper or lower case characters [default=false].
-m/ --maxresults the maximum number of search results to return [optional].
-p/ --pattern a comma-separated list of file patterns to include in the search [optional].
-x/ --regexp whether [query] should be interpreted as a regular expression [default=false].
-r/ --replace string to replace a match with [default=false].
-a/ --replaceAll during a replace action, replace all occurrences [default=false].
-w/ --wholeword only match strings that are delimited by non-word characters [default=false].
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment