Created
February 21, 2010 13:45
-
-
Save kares/310321 to your computer and use it in GitHub Desktop.
Globbing implemented in Groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Licensed under the "Apache License, Version 2.0" (c) 2010 | |
/** | |
* Returns filenames found by expanding the passed pattern which is String or | |
* a List of patterns. | |
* NOTE: that this pattern is not a regexp (it’s closer to a shell glob). | |
* NOTE: that case sensitivity depends on your system. | |
* | |
* <code>*</code> Matches any file. Can be restricted by other values in | |
* the glob pattern (same as <code>.*</code> in regexp). | |
* <code>*</code> will match all files, | |
* <code>c*</code> will match all files beginning with c, | |
* <code>*c</code> will match all files ending with c. | |
* | |
* <code>**</code> Matches directories recursively. | |
* | |
* <code>?</code> Matches any one character. Equivalent to <code>.</code> | |
* in a regular expression. | |
* | |
* <code>[set]</code> Matches any one character in set. Behaves like character | |
* sets in regex, including negation (<code>[^a-z]</code>). | |
* | |
* <code>{p,q}</code> Matches either literal p or literal q. Matching literals | |
* may be more than one character in length. More than two | |
* literals may be specified. Same as alternation in regexp. | |
* | |
* NOTE: when matching special characters an escape is required, for example : | |
* <code>"\\*"</code> or <code>"\\\\"</code>. | |
* | |
* NOTE: flags (e.g. case insensitive matching) are not supported. | |
* | |
* @see http://ruby-doc.org/core/classes/Dir.html | |
* @see http://www.faqs.org/docs/abs/HTML/globbingref.html | |
* @author Karol Bucek | |
*/ | |
File.metaClass.'static'.glob = { pattern -> | |
if ( pattern == null ) throw new IllegalArgumentException('null pattern') | |
if ( pattern instanceof Collection | |
|| pattern instanceof Object[] ) { | |
if ( pattern.size() == 0 ) return [] | |
return pattern.toList().sum({ File.glob(it) }) | |
} | |
def base = '', path = pattern.tokenize('/') | |
int i = -1, s = path.size() | |
while ( ++i < s - 1 ) { | |
// STOP on 'wild'-cards : | |
// 1. * (equivalent to /.*/x in regexp) | |
// 2. ? (equivalent to /.{1}/ in regexp) | |
// 3. [set] | |
// 4. {p,q} | |
if ( path[i] ==~ /.*[^\\]?[\*|\?|\[|\]|\{|\}].*/ ) break | |
} | |
base = path[0..<i].join('/'); pattern = path[i..<s].join('/') | |
// a char loop over the pattern - instead of a bunch of replace() calls : | |
char c; boolean curling = false; // (c) Vancouver 2010 :) | |
final Closure notEscaped = { j -> // todo handling 2 escapes is enought ! | |
if ( j == 0 || pattern.charAt(j-1) != '\\' ) return true | |
return ( j > 1 && pattern.charAt(j-2) == '\\') // [j-1] was '\\' | |
} | |
StringBuilder pb = new StringBuilder() | |
for (i=0; i<(s = pattern.length()); i++) { | |
switch (c = pattern.charAt(i)) { | |
case ['.', '$'] as char[] : // escape special chars | |
pb.append('\\').append(c) | |
break | |
case '?' as char : // 2. ? | |
if ( notEscaped(i) ) pb.append('.') | |
else pb.append(c) | |
break | |
case '*' as char : // 1. * (or **) | |
if ( notEscaped(i) ) { | |
if ( i==s-1 || pattern.charAt(i+1) != '*' ) pb.append('.*?') | |
else (pb.append('**') && i++) // skip next * | |
} | |
else pb.append(c) | |
break | |
case '{' as char : // 4. {a,bc} -> (a|bc) | |
if ( notEscaped(i) ) { pb.append('('); curling = true } | |
else pb.append(c) | |
break | |
case ',' as char : // 4. {a,bc} -> (a|bc) | |
if ( notEscaped(i) && curling ) pb.append('|') | |
else pb.append(c) | |
break | |
case '}' as char : // 4. {a,bc} -> (a|bc) | |
if ( notEscaped(i) && curling ) { pb.append(')'); curling = false } | |
else pb.append(c) | |
break | |
default : pb.append(c) | |
} | |
} | |
// if the last char is not a wildcard match the end : | |
if ( c != '?' && c != ')' && c != ']' ) pb.append('$') | |
pattern = pb.toString() | |
// meh - a nice one : | |
// new File('').exists() != new File(new File('').absolutePath).exists() | |
final File baseFile = new File(base).getAbsoluteFile() // base might be '' | |
final List fnames = [] // the result - file names | |
//println "base: $base pattern: $pattern" | |
if ( baseFile.exists() ) { // do not throw a FileNotFoundException | |
final List matchedDirs = [ baseFile ] | |
if ( (path = pattern.tokenize('/')).size() > 1 ) { | |
// list and yield all dirs of the given dir : | |
final Closure listDirs = { dir, yield -> | |
for ( File file : dir.listFiles() ) | |
if ( file.isDirectory() ) yield.call(file, yield) | |
} | |
path[0..-2].each { subPattern -> | |
final boolean global = (subPattern == '**') | |
// match the dir, second param is the closure itself : | |
final Closure matchDir = { dir, self -> | |
if ( global || dir.name ==~ subPattern ) { | |
matchedDirs.add(dir) | |
} | |
if ( global ) listDirs(dir, self) // recurse | |
} | |
File[] mdirs = matchedDirs.toArray(); matchedDirs.clear() | |
for ( File mdir : mdirs ) { | |
if ( global ) matchedDirs.add(mdir) | |
listDirs( mdir, matchDir ) | |
} | |
} | |
} | |
// we used the absolute path - thus might need to remove the 'prefix' : | |
s = base ? baseFile.path.lastIndexOf(base) : (baseFile.path.length() + 1) | |
// add the files matching in a given directory to the result : | |
final Closure addMatchingFiles = { dir, p -> | |
dir.list({ pdir, name -> | |
if ( name ==~ p ) fnames << "${pdir.path}/$name".substring(s) | |
return false // we do not care about the list() return value | |
} as FilenameFilter) | |
} | |
for (i = 0; i<matchedDirs.size(); i++) { | |
// we only need the match agains the last "path" | |
// aka the pattern was tokenized with '/' : | |
addMatchingFiles(matchedDirs[i], path[-1]) | |
} | |
} | |
return fnames | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import groovy.util.GroovyTestCase | |
// "test" inspired by rubicon/test_dir.rb : | |
class FileGlobTest extends GroovyTestCase { | |
void setUp() { | |
deleteTestDir() | |
try { new File('_test').mkdir() } | |
catch(e) { | |
System.out.println("Cannot run a file or directory test: " + | |
"will destroy existing directory '$TEST_DIR'") | |
System.exit(1) | |
} | |
} | |
void tearDown() { | |
deleteTestDir() | |
} | |
def deleteTestDir() { | |
def testDir = new File('_test') | |
if (testDir.exists()) testDir.deleteDir() | |
} | |
def createTest1FS() { | |
new File("_test/_file1").createNewFile() | |
new File("_test/_file2").createNewFile() | |
} | |
void testBasicMatching() { | |
createTest1FS() | |
[ | |
[ [ "_test" ], "_test" ], | |
[ [ "_test" ], /* "_test/" in ruby */ "_test/" ], | |
[ [ "_test/_file1", "_test/_file2" ], "_test/*" ], | |
[ [ "_test/_file1", "_test/_file2" ], "_test/_file*" ], | |
[ [], "_test/frog*" ], | |
[ [ "_test/_file1" ], "_test/_file1" ], | |
[ [ "_test/_file1", "_test/_file2" ], "**/_file*" ], | |
[ [ "_test/_file1", "_test/_file2" ], "_test/**/_file*" ], | |
[ [ "_test/_file1", "_test/_file2" ], "_test/_file[0-9]*" ], | |
[ [], "_test/_file[a-z]*" ], | |
[ [ "_test/_file1", "_test/_file2" ], "_test/_file{0,1,2,3}" ], | |
[ [ "_test/_file1", "_test/_file2" ], "_test/_file{0|1|2|3}" ], | |
[ [], "_test/_file{4,5,6,7}" ], | |
[ [], "_test/_file{4|5|6|7}" ], | |
[ [ "_test/_file2" ], "_test/_file{0,2,4,6}" ], | |
[ [ "_test/_file2" ], "_test/_file{0|2|4|6}" ], | |
[ [ "_test/_file1", "_test/_file2" ], "**/_f*[il]l*" ], | |
[ [ "_test/_file1", "_test/_file2" ], "**/_f*[il]e[0-9]" ], | |
[ [ "_test/_file1" ], "**/_f*[il]e[01]" ], | |
[ [ "_test/_file1" ], "**/_f*[il]e[01]*" ], | |
[ [ "_test/_file1" ], "**/_f*[^ie]e[01]*" ], | |
[ [], "**/_file[^0-9]" ], | |
[ [ "_test/_file2" ], "**/_file[^0-1]" ], | |
].each { expected, globStr -> | |
println "test expects: $expected out of File.glob('$globStr')" | |
assertBagEqual(expected, File.glob(globStr)) | |
} | |
} | |
void testMatchingNonExistingNames() { | |
createTest1FS() | |
[ | |
[ [], "-test" ], | |
[ [], "-test/*" ], | |
[ [], "_tes" ], | |
[ [], "_tes/_file1" ], | |
[ [], "_ test" ], | |
[ [], "_ test/_file1" ], | |
].each { expected, globStr -> | |
println "test expects: $expected out of File.glob('$globStr')" | |
assertBagEqual(expected, File.glob(globStr)) | |
} | |
} | |
def createTest2FS() { | |
new File("_test/1").mkdirs() | |
new File("_test/2/d3").mkdirs() | |
new File("_test/1/f1.txt").createNewFile() | |
new File("_test/2/f1").createNewFile() | |
new File("_test/2/f2.log").createNewFile() | |
new File("_test/2/d3/f3.txt").createNewFile() | |
new File("_test/2/d3/f33.txt").createNewFile() | |
} | |
void testGlobalMatching() { | |
createTest2FS() | |
[ | |
[ [ "_test/1/f1.txt", "_test/2/d3/f3.txt", | |
"_test/2/d3/f33.txt" ], "**/*.txt" ], | |
[ [ "_test/1", "_test/2/f1" ], "**/*1" ], | |
[ [ "_test/1/f1.txt" ], "**/*1.txt" ], | |
[ [ "_test/1/f1.txt", "_test/2/d3/f3.txt" ], "**/f?.txt" ], | |
[ [ "_test/2/f2.log", "_test/2/d3/f3.txt" ], "_test/2/**/f?.*" ], | |
[ [ "_test/1", "_test/2", "_test/2/d3" ], "_test/**/[^f]*" ], | |
].each { expected, globStr -> | |
println "test expects: $expected out of File.glob('$globStr')" | |
assertBagEqual(expected, File.glob(globStr)) | |
} | |
} | |
void testNestedMatching() { | |
createTest2FS() | |
[ | |
[ [ "_test/2/f2.log", "_test/2/d3" ], "_test/{1,2}/{f,[a-d],x,y}{[2-3]}*" ], | |
[ [ "_test/2/d3/f3.txt", "_test/2/d3/f33.txt" ], "_test/{1,?}/**/{[^a-c][^0-2],z}*.txt" ], | |
].each { expected, globStr -> | |
println "test expects: $expected out of File.glob('$globStr')" | |
assertBagEqual(expected, File.glob(globStr)) | |
} | |
} | |
def createTest3FS() { | |
new File("_test/*").mkdirs() | |
new File("_test/\$/.2").mkdirs() | |
new File("_test/\$/*").createNewFile() | |
new File("_test/*/?").createNewFile() | |
new File("_test/*/?.\$").createNewFile() | |
new File("_test/\$/.2/{1,2}").createNewFile() | |
new File("_test/\$/[a-c]").createNewFile() | |
new File("_test/\$/a,").createNewFile() | |
new File("_test/\$/b\\").createNewFile() | |
new File("_test/\$/c^").createNewFile() | |
} | |
void testMatchingMetaChars() { | |
createTest3FS() | |
[ | |
[ [ "_test/*/?" ], "**/*\\?" ], | |
[ [ "_test/*/?", "_test/*/?.\$" ], "**/\\?*" ], | |
[ [ "_test/\$/.2", "_test/*/?.\$" ], "**/*.?" ], | |
[ [ "_test/\$/[a-c]", "_test/\$/a," ], "_test/*/*a*" ], | |
[ [ "_test/\$/[a-c]" ], "_test/*/*{\\],\\}}" ], | |
[ [ "_test/\$/[a-c]", "_test/\$/.2/{1,2}" ], "_test/**/*{\\],\\}}" ], | |
[ [ "_test/\$", "_test/*/?.\$" ], "_test/**/*\$" ], | |
[ [ "_test/\$/.2/{1,2}" ], "**/.?/*" ], | |
[ [ "_test/\$/b\\" ], "**/\$/?\\\\" ], | |
[ [ "_test/\$/b\\" ], "**/\$/**/*\\\\" ], | |
[ [ "_test/\$/b\\", "_test/\$/c^" ], "**/?{\\\\,\\^}" ], | |
[ [ "_test/\$/a,", "_test/\$/b\\", | |
"_test/\$/c^" ], "**/[a-c]{\\,|\\\\|\\^}" ], | |
].each { expected, globStr -> | |
println "test expected: $expected out of File.glob('$globStr')" | |
assertBagEqual(expected, File.glob(globStr)) | |
} | |
} | |
void testAcceptsListOrArrayAsFilter() { | |
new File("_test/_dir").mkdirs() | |
new File("_test/_dir/_file1").createNewFile() | |
new File("_test/_dir/_file2").createNewFile() | |
def expected = [ '_test/_dir/_file1', '_test/_dir/_file2' ] | |
assertBagEqual(expected, File.glob(['_test/**/*1', '_test/_dir/?file2'])) | |
assertBagEqual(expected, File.glob(['**/_file1', '**/_dir/_*2'] as Object[])) | |
assertBagEqual([], File.glob([])) | |
} | |
void testEmptyStringMatchesNothing() { | |
new File("_test/_dir").mkdirs() | |
new File("_test/_file").createNewFile() | |
new File("_test/_dir/_file").createNewFile() | |
def expected = [ '_test/_dir/_file1', '_test/_dir/_file2' ] | |
assertBagEqual([], File.glob('')) | |
} | |
def assertBagEqual(expected, actual) { | |
def expectedLeft = expected.clone() | |
actual.each { item -> expectedLeft.remove(item) } | |
def msg = "expected: ${expected}, actual: ${actual}" | |
assert expected.size() == actual.size() && expectedLeft.isEmpty() : msg | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment