Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@kares
Created February 21, 2010 13:45
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kares/310321 to your computer and use it in GitHub Desktop.
Save kares/310321 to your computer and use it in GitHub Desktop.
Globbing implemented in Groovy
// Licensed under the "Apache License, Version 2.0" (c) 2010
/**
* Returns filenames found by expanding the passed pattern which is String or
* a List of patterns.
* NOTE: that this pattern is not a regexp (it’s closer to a shell glob).
* NOTE: that case sensitivity depends on your system.
*
* <code>*</code> Matches any file. Can be restricted by other values in
* the glob pattern (same as <code>.*</code> in regexp).
* <code>*</code> will match all files,
* <code>c*</code> will match all files beginning with c,
* <code>*c</code> will match all files ending with c.
*
* <code>**</code> Matches directories recursively.
*
* <code>?</code> Matches any one character. Equivalent to <code>.</code>
* in a regular expression.
*
* <code>[set]</code> Matches any one character in set. Behaves like character
* sets in regex, including negation (<code>[^a-z]</code>).
*
* <code>{p,q}</code> Matches either literal p or literal q. Matching literals
* may be more than one character in length. More than two
* literals may be specified. Same as alternation in regexp.
*
* NOTE: when matching special characters an escape is required, for example :
* <code>"\\*"</code> or <code>"\\\\"</code>.
*
* NOTE: flags (e.g. case insensitive matching) are not supported.
*
* @see http://ruby-doc.org/core/classes/Dir.html
* @see http://www.faqs.org/docs/abs/HTML/globbingref.html
* @author Karol Bucek
*/
File.metaClass.'static'.glob = { pattern ->
if ( pattern == null ) throw new IllegalArgumentException('null pattern')
if ( pattern instanceof Collection
|| pattern instanceof Object[] ) {
if ( pattern.size() == 0 ) return []
return pattern.toList().sum({ File.glob(it) })
}
def base = '', path = pattern.tokenize('/')
int i = -1, s = path.size()
while ( ++i < s - 1 ) {
// STOP on 'wild'-cards :
// 1. * (equivalent to /.*/x in regexp)
// 2. ? (equivalent to /.{1}/ in regexp)
// 3. [set]
// 4. {p,q}
if ( path[i] ==~ /.*[^\\]?[\*|\?|\[|\]|\{|\}].*/ ) break
}
base = path[0..<i].join('/'); pattern = path[i..<s].join('/')
// a char loop over the pattern - instead of a bunch of replace() calls :
char c; boolean curling = false; // (c) Vancouver 2010 :)
final Closure notEscaped = { j -> // todo handling 2 escapes is enought !
if ( j == 0 || pattern.charAt(j-1) != '\\' ) return true
return ( j > 1 && pattern.charAt(j-2) == '\\') // [j-1] was '\\'
}
StringBuilder pb = new StringBuilder()
for (i=0; i<(s = pattern.length()); i++) {
switch (c = pattern.charAt(i)) {
case ['.', '$'] as char[] : // escape special chars
pb.append('\\').append(c)
break
case '?' as char : // 2. ?
if ( notEscaped(i) ) pb.append('.')
else pb.append(c)
break
case '*' as char : // 1. * (or **)
if ( notEscaped(i) ) {
if ( i==s-1 || pattern.charAt(i+1) != '*' ) pb.append('.*?')
else (pb.append('**') && i++) // skip next *
}
else pb.append(c)
break
case '{' as char : // 4. {a,bc} -> (a|bc)
if ( notEscaped(i) ) { pb.append('('); curling = true }
else pb.append(c)
break
case ',' as char : // 4. {a,bc} -> (a|bc)
if ( notEscaped(i) && curling ) pb.append('|')
else pb.append(c)
break
case '}' as char : // 4. {a,bc} -> (a|bc)
if ( notEscaped(i) && curling ) { pb.append(')'); curling = false }
else pb.append(c)
break
default : pb.append(c)
}
}
// if the last char is not a wildcard match the end :
if ( c != '?' && c != ')' && c != ']' ) pb.append('$')
pattern = pb.toString()
// meh - a nice one :
// new File('').exists() != new File(new File('').absolutePath).exists()
final File baseFile = new File(base).getAbsoluteFile() // base might be ''
final List fnames = [] // the result - file names
//println "base: $base pattern: $pattern"
if ( baseFile.exists() ) { // do not throw a FileNotFoundException
final List matchedDirs = [ baseFile ]
if ( (path = pattern.tokenize('/')).size() > 1 ) {
// list and yield all dirs of the given dir :
final Closure listDirs = { dir, yield ->
for ( File file : dir.listFiles() )
if ( file.isDirectory() ) yield.call(file, yield)
}
path[0..-2].each { subPattern ->
final boolean global = (subPattern == '**')
// match the dir, second param is the closure itself :
final Closure matchDir = { dir, self ->
if ( global || dir.name ==~ subPattern ) {
matchedDirs.add(dir)
}
if ( global ) listDirs(dir, self) // recurse
}
File[] mdirs = matchedDirs.toArray(); matchedDirs.clear()
for ( File mdir : mdirs ) {
if ( global ) matchedDirs.add(mdir)
listDirs( mdir, matchDir )
}
}
}
// we used the absolute path - thus might need to remove the 'prefix' :
s = base ? baseFile.path.lastIndexOf(base) : (baseFile.path.length() + 1)
// add the files matching in a given directory to the result :
final Closure addMatchingFiles = { dir, p ->
dir.list({ pdir, name ->
if ( name ==~ p ) fnames << "${pdir.path}/$name".substring(s)
return false // we do not care about the list() return value
} as FilenameFilter)
}
for (i = 0; i<matchedDirs.size(); i++) {
// we only need the match agains the last "path"
// aka the pattern was tokenized with '/' :
addMatchingFiles(matchedDirs[i], path[-1])
}
}
return fnames
}
import groovy.util.GroovyTestCase
// "test" inspired by rubicon/test_dir.rb :
class FileGlobTest extends GroovyTestCase {
void setUp() {
deleteTestDir()
try { new File('_test').mkdir() }
catch(e) {
System.out.println("Cannot run a file or directory test: " +
"will destroy existing directory '$TEST_DIR'")
System.exit(1)
}
}
void tearDown() {
deleteTestDir()
}
def deleteTestDir() {
def testDir = new File('_test')
if (testDir.exists()) testDir.deleteDir()
}
def createTest1FS() {
new File("_test/_file1").createNewFile()
new File("_test/_file2").createNewFile()
}
void testBasicMatching() {
createTest1FS()
[
[ [ "_test" ], "_test" ],
[ [ "_test" ], /* "_test/" in ruby */ "_test/" ],
[ [ "_test/_file1", "_test/_file2" ], "_test/*" ],
[ [ "_test/_file1", "_test/_file2" ], "_test/_file*" ],
[ [], "_test/frog*" ],
[ [ "_test/_file1" ], "_test/_file1" ],
[ [ "_test/_file1", "_test/_file2" ], "**/_file*" ],
[ [ "_test/_file1", "_test/_file2" ], "_test/**/_file*" ],
[ [ "_test/_file1", "_test/_file2" ], "_test/_file[0-9]*" ],
[ [], "_test/_file[a-z]*" ],
[ [ "_test/_file1", "_test/_file2" ], "_test/_file{0,1,2,3}" ],
[ [ "_test/_file1", "_test/_file2" ], "_test/_file{0|1|2|3}" ],
[ [], "_test/_file{4,5,6,7}" ],
[ [], "_test/_file{4|5|6|7}" ],
[ [ "_test/_file2" ], "_test/_file{0,2,4,6}" ],
[ [ "_test/_file2" ], "_test/_file{0|2|4|6}" ],
[ [ "_test/_file1", "_test/_file2" ], "**/_f*[il]l*" ],
[ [ "_test/_file1", "_test/_file2" ], "**/_f*[il]e[0-9]" ],
[ [ "_test/_file1" ], "**/_f*[il]e[01]" ],
[ [ "_test/_file1" ], "**/_f*[il]e[01]*" ],
[ [ "_test/_file1" ], "**/_f*[^ie]e[01]*" ],
[ [], "**/_file[^0-9]" ],
[ [ "_test/_file2" ], "**/_file[^0-1]" ],
].each { expected, globStr ->
println "test expects: $expected out of File.glob('$globStr')"
assertBagEqual(expected, File.glob(globStr))
}
}
void testMatchingNonExistingNames() {
createTest1FS()
[
[ [], "-test" ],
[ [], "-test/*" ],
[ [], "_tes" ],
[ [], "_tes/_file1" ],
[ [], "_ test" ],
[ [], "_ test/_file1" ],
].each { expected, globStr ->
println "test expects: $expected out of File.glob('$globStr')"
assertBagEqual(expected, File.glob(globStr))
}
}
def createTest2FS() {
new File("_test/1").mkdirs()
new File("_test/2/d3").mkdirs()
new File("_test/1/f1.txt").createNewFile()
new File("_test/2/f1").createNewFile()
new File("_test/2/f2.log").createNewFile()
new File("_test/2/d3/f3.txt").createNewFile()
new File("_test/2/d3/f33.txt").createNewFile()
}
void testGlobalMatching() {
createTest2FS()
[
[ [ "_test/1/f1.txt", "_test/2/d3/f3.txt",
"_test/2/d3/f33.txt" ], "**/*.txt" ],
[ [ "_test/1", "_test/2/f1" ], "**/*1" ],
[ [ "_test/1/f1.txt" ], "**/*1.txt" ],
[ [ "_test/1/f1.txt", "_test/2/d3/f3.txt" ], "**/f?.txt" ],
[ [ "_test/2/f2.log", "_test/2/d3/f3.txt" ], "_test/2/**/f?.*" ],
[ [ "_test/1", "_test/2", "_test/2/d3" ], "_test/**/[^f]*" ],
].each { expected, globStr ->
println "test expects: $expected out of File.glob('$globStr')"
assertBagEqual(expected, File.glob(globStr))
}
}
void testNestedMatching() {
createTest2FS()
[
[ [ "_test/2/f2.log", "_test/2/d3" ], "_test/{1,2}/{f,[a-d],x,y}{[2-3]}*" ],
[ [ "_test/2/d3/f3.txt", "_test/2/d3/f33.txt" ], "_test/{1,?}/**/{[^a-c][^0-2],z}*.txt" ],
].each { expected, globStr ->
println "test expects: $expected out of File.glob('$globStr')"
assertBagEqual(expected, File.glob(globStr))
}
}
def createTest3FS() {
new File("_test/*").mkdirs()
new File("_test/\$/.2").mkdirs()
new File("_test/\$/*").createNewFile()
new File("_test/*/?").createNewFile()
new File("_test/*/?.\$").createNewFile()
new File("_test/\$/.2/{1,2}").createNewFile()
new File("_test/\$/[a-c]").createNewFile()
new File("_test/\$/a,").createNewFile()
new File("_test/\$/b\\").createNewFile()
new File("_test/\$/c^").createNewFile()
}
void testMatchingMetaChars() {
createTest3FS()
[
[ [ "_test/*/?" ], "**/*\\?" ],
[ [ "_test/*/?", "_test/*/?.\$" ], "**/\\?*" ],
[ [ "_test/\$/.2", "_test/*/?.\$" ], "**/*.?" ],
[ [ "_test/\$/[a-c]", "_test/\$/a," ], "_test/*/*a*" ],
[ [ "_test/\$/[a-c]" ], "_test/*/*{\\],\\}}" ],
[ [ "_test/\$/[a-c]", "_test/\$/.2/{1,2}" ], "_test/**/*{\\],\\}}" ],
[ [ "_test/\$", "_test/*/?.\$" ], "_test/**/*\$" ],
[ [ "_test/\$/.2/{1,2}" ], "**/.?/*" ],
[ [ "_test/\$/b\\" ], "**/\$/?\\\\" ],
[ [ "_test/\$/b\\" ], "**/\$/**/*\\\\" ],
[ [ "_test/\$/b\\", "_test/\$/c^" ], "**/?{\\\\,\\^}" ],
[ [ "_test/\$/a,", "_test/\$/b\\",
"_test/\$/c^" ], "**/[a-c]{\\,|\\\\|\\^}" ],
].each { expected, globStr ->
println "test expected: $expected out of File.glob('$globStr')"
assertBagEqual(expected, File.glob(globStr))
}
}
void testAcceptsListOrArrayAsFilter() {
new File("_test/_dir").mkdirs()
new File("_test/_dir/_file1").createNewFile()
new File("_test/_dir/_file2").createNewFile()
def expected = [ '_test/_dir/_file1', '_test/_dir/_file2' ]
assertBagEqual(expected, File.glob(['_test/**/*1', '_test/_dir/?file2']))
assertBagEqual(expected, File.glob(['**/_file1', '**/_dir/_*2'] as Object[]))
assertBagEqual([], File.glob([]))
}
void testEmptyStringMatchesNothing() {
new File("_test/_dir").mkdirs()
new File("_test/_file").createNewFile()
new File("_test/_dir/_file").createNewFile()
def expected = [ '_test/_dir/_file1', '_test/_dir/_file2' ]
assertBagEqual([], File.glob(''))
}
def assertBagEqual(expected, actual) {
def expectedLeft = expected.clone()
actual.each { item -> expectedLeft.remove(item) }
def msg = "expected: ${expected}, actual: ${actual}"
assert expected.size() == actual.size() && expectedLeft.isEmpty() : msg
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment