Skip to content

Instantly share code, notes, and snippets.

@kaizhu256
Last active February 19, 2023 22:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kaizhu256/8ec8f605ad2b99afff08a9969d3fbfe9 to your computer and use it in GitHub Desktop.
Save kaizhu256/8ec8f605ad2b99afff08a9969d3fbfe9 to your computer and use it in GitHub Desktop.
This gist file demos a performant, self-contained function "globExclude()", which batch-globs <pathnameList> in a single pass, with given filters <excludeList>, <includeList>.
/*jslint beta, node*/
// This gist file demos a performant, self-contained function "globExclude()",
// which batch-globs <pathnameList> in a single pass,
// with given filters <excludeList>, <includeList>.
//
// Could be useful if you need to glob thousands of files for test-coverage,
// or other purposes.
// Example usage
/*
Output from below code:
[
'.eslintrc.js',
'deps/extract.js',
'lib',
'lib/sqlite3-binding.js',
'lib/sqlite3.js',
'lib/trace.js',
'tools/benchmark/insert.js'
]
*/
console.log(
globExclude({
excludeList: [
"tes?/",
"tes[!0-9A-Z_a-z-]/",
"tes[0-9A-Z_a-z-]/",
"tes[^0-9A-Z_a-z-]/",
"test/**/*.js",
"test/suppor*/*elper.js",
"test/suppor?/?elper.js",
"test/support/helper.js"
],
includeList: [
"**/*.cjs",
"**/*.js",
"**/*.mjs",
"li*/*.js",
"li?/*.js",
"lib/",
"lib/**/*.js",
"lib/*.js",
"lib/sqlite3.js"
],
pathnameList: [
".",
".dockerignore",
".eslintrc.js",
".github",
".github/workflows",
".github/workflows/ci.yml",
".gitignore",
"LICENSE",
"README.md",
"binding.gyp",
"deps",
"deps/common-sqlite.gypi",
"deps/extract.js",
"lib",
"lib/sqlite3-binding.js",
"lib/sqlite3.js",
"lib/trace.js",
"package.json",
"src",
"src/async.h",
"src/backup.cc",
"src/backup.h",
"src/database.cc",
"src/database.h",
"src/gcc-preinclude.h",
"src/macros.h",
"src/node_sqlite3.cc",
"src/statement.cc",
"src/statement.h",
"src/threading.h",
"test",
"test/.eslintrc.js",
"test/affected.test.js",
"test/async_calls.test.js",
"test/backup.test.js",
"test/blob.test.js",
"test/cache.test.js",
"test/constants.test.js",
"test/database_fail.test.js",
"test/each.test.js",
"test/exec.test.js",
"tools",
"tools/benchmark",
"tools/benchmark/insert.js"
]
}).pathnameList
);
function globExclude({
excludeList = [],
includeList = [],
pathnameList = []
}) {
// This function will
// 1. Exclude pathnames in <pathnameList> that don't match glob-patterns in
// <includeList>.
// 2. Exclude pathnames in <pathnameList> that match glob-patterns in
// <excludeList>.
function globAssertNotWeird(list, name) {
// This function will check if <list> of strings contain weird characters.
[
[
"\n", (
/^.*?([\u0000-\u0007\r]).*/gm
)
],
[
"\r", (
/^.*?([\n]).*/gm
)
]
].forEach(function ([
separator, rgx
]) {
list.join(separator).replace(rgx, function (match0, char) {
throw new Error(
"Weird character "
+ JSON.stringify(char)
+ " found in " + name + " "
+ JSON.stringify(match0)
);
});
});
}
function globToRegexp(pattern) {
// This function will translate glob <pattern> to javascript-regexp,
// which javascript can then use to "glob" pathnames.
let ii = 0;
let isClass = false;
let strClass = "";
let strRegex = "";
pattern = pattern.replace((
/\/\/+/g
), "/");
pattern = pattern.replace((
/\*\*\*+/g
), "**");
pattern.replace((
/\\\\|\\\[|\\\]|\[|\]|./g
), function (match0) {
switch (match0) {
case "[":
if (isClass) {
strClass += "[";
return;
}
strClass += "\u0000";
strRegex += "\u0000";
isClass = true;
return;
case "]":
if (isClass) {
isClass = false;
return;
}
strRegex += "]";
return;
default:
if (isClass) {
strClass += match0;
return;
}
strRegex += match0;
}
return "";
});
strClass += "\u0000";
// An expression "[!...]" matches a single character, namely any character that
// is not matched by the expression obtained by removing the first '!' from it.
// (Thus, "[!a-]" matches any single character except 'a', and '-'.)
strClass = strClass.replace((
/\u0000!/g
), "\u0000^");
// One may include '-' in its literal meaning by making it the first or last
// character between the brackets.
strClass = strClass.replace((
/\u0000-/g
), "\u0000\\-");
strClass = strClass.replace((
/-\u0000/g
), "\\-\u0000");
// Escape brackets '[', ']' in character class.
strClass = strClass.replace((
/[\[\]]/g
), "\\$&");
// https://stackoverflow.com/questions/3561493
// /is-there-a-regexp-escape-function-in-javascript
// $()*+-./?[\]^{|}
strRegex = strRegex.replace((
// Ignore [-/].
/[$()*+.?\[\\\]\^{|}]/g
), "\\$&");
// Expand wildcard '**/*'.
strRegex = strRegex.replace((
/\\\*\\\*\/(?:\\\*)+/g
), ".*?");
// Expand wildcard '**'.
strRegex = strRegex.replace((
/(^|\/)\\\*\\\*(\/|$)/gm
), "$1.*?$2");
// Expand wildcard '*'.
strRegex = strRegex.replace((
/(?:\\\*)+/g
), "[^\\/]*?");
// Expand wildcard '?'.
strRegex = strRegex.replace((
/\\\?/g
), "[^\\/]");
// Expand directory-with-trailing-slash '.../'.
strRegex = strRegex.replace((
/\/$/gm
), "\\/.*?");
// Merge strClass into strRegex.
ii = 0;
strClass = strClass.split("\u0000");
strRegex = strRegex.replace((
/\u0000/g
), function () {
ii += 1;
if (strClass[ii] === "") {
return "";
}
return "[" + strClass[ii] + "]";
});
// Change strRegex from string to regexp.
strRegex = new RegExp("^" + strRegex + "$", "gm");
return strRegex;
}
// Validate excludeList, includeList, pathnameList.
globAssertNotWeird(excludeList, "pattern");
globAssertNotWeird(includeList, "pattern");
globAssertNotWeird(pathnameList, "pathname");
// Optimization
// Concat pathnames into a single, newline-separated string,
// whose pathnames can all be filtered with a single, regexp-pass.
pathnameList = pathnameList.join("\n");
// 1. Exclude pathnames in <pathnameList> that don't match glob-patterns in
// <includeList>.
if (includeList.length > 0) {
includeList = includeList.map(globToRegexp);
includeList.forEach(function (pattern) {
pathnameList = pathnameList.replace(pattern, "\u0000$&");
});
pathnameList = pathnameList.replace((
/^[^\u0000].*/gm
), "");
pathnameList = pathnameList.replace((
/^\u0000+/gm
), "");
}
// 2. Exclude pathnames in <pathnameList> that match glob-patterns in
// <excludeList>.
excludeList = excludeList.map(globToRegexp);
excludeList.forEach(function (pattern) {
pathnameList = pathnameList.replace(pattern, "");
});
// Split newline-separated pathnames back to list.
pathnameList = pathnameList.split("\n").filter(function (elem) {
return elem;
});
return {
excludeList,
includeList,
pathnameList
};
}
export {
globExclude
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment