Skip to content

Instantly share code, notes, and snippets.

@lilyball
Created January 9, 2010 04:16
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save lilyball/272720 to your computer and use it in GitHub Desktop.
Save lilyball/272720 to your computer and use it in GitHub Desktop.
fnmatch implementation for Go
// Provide string-matching based on fnmatch.3
package fnmatch
// There are a few issues that I believe to be bugs, but this implementation is
// based as closely as possible on BSD fnmatch. These bugs are present in the
// source of BSD fnmatch, and so are replicated here. The issues are as follows:
//
// * FNM_PERIOD is no longer observed after the first * in a pattern
// This only applies to matches done with FNM_PATHNAME as well
// * FNM_PERIOD doesn't apply to ranges. According to the documentation,
// a period must be matched explicitly, but a range will match it too
import (
"unicode"
"utf8"
)
const (
FNM_NOESCAPE = (1 << iota)
FNM_PATHNAME
FNM_PERIOD
FNM_LEADING_DIR
FNM_CASEFOLD
FNM_IGNORECASE = FNM_CASEFOLD
FNM_FILE_NAME = FNM_PATHNAME
)
func unpackRune(str *string) int {
rune, size := utf8.DecodeRuneInString(*str)
*str = (*str)[size:]
return rune
}
// Matches the pattern against the string, with the given flags,
// and returns true if the match is successful.
// This function should match fnmatch.3 as closely as possible.
func Match(pattern, s string, flags int) bool {
// The implementation for this function was patterned after the BSD fnmatch.c
// source found at http://src.gnu-darwin.org/src/contrib/csup/fnmatch.c.html
noescape := (flags&FNM_NOESCAPE != 0)
pathname := (flags&FNM_PATHNAME != 0)
period := (flags&FNM_PERIOD != 0)
leadingdir := (flags&FNM_LEADING_DIR != 0)
casefold := (flags&FNM_CASEFOLD != 0)
// the following is some bookkeeping that the original fnmatch.c implementation did not do
// We are forced to do this because we're not keeping indexes into C strings but rather
// processing utf8-encoded strings. Use a custom unpacker to maintain our state for us
sAtStart := true
sLastAtStart := true
sLastSlash := false
sLastUnpacked := 0
unpackS := func() int {
sLastSlash = (sLastUnpacked == '/')
sLastUnpacked = unpackRune(&s)
sLastAtStart = sAtStart
sAtStart = false
return sLastUnpacked
}
for len(pattern) > 0 {
c := unpackRune(&pattern)
switch c {
case '?':
if len(s) == 0 {
return false
}
sc := unpackS()
if pathname && sc == '/' {
return false
}
if period && sc == '.' && (sLastAtStart || (pathname && sLastSlash)) {
return false
}
case '*':
// collapse multiple *'s
// don't use unpackRune here, the only char we care to detect is ASCII
for len(pattern) > 0 && pattern[0] == '*' {
pattern = pattern[1:]
}
if period && s[0] == '.' && (sAtStart || (pathname && sLastUnpacked == '/')) {
return false
}
// optimize for patterns with * at end or before /
if len(pattern) == 0 {
if pathname {
return leadingdir || (strchr(s, '/') == -1)
} else {
return true
}
return !(pathname && strchr(s, '/') >= 0)
} else if pathname && pattern[0] == '/' {
offset := strchr(s, '/')
if offset == -1 {
return false
} else {
// we already know our pattern and string have a /, skip past it
s = s[offset:] // use unpackS here to maintain our bookkeeping state
unpackS()
pattern = pattern[1:] // we know / is one byte long
break
}
}
// general case, recurse
for test := s; len(test) > 0; unpackRune(&test) {
// I believe the (flags &^ FNM_PERIOD) is a bug when FNM_PATHNAME is specified
// but this follows exactly from how fnmatch.c implements it
if Match(pattern, test, (flags &^ FNM_PERIOD)) {
return true
} else if pathname && test[0] == '/' {
break
}
}
return false
case '[':
if len(s) == 0 {
return false
}
if pathname && s[0] == '/' {
return false
}
sc := unpackS()
if !rangematch(&pattern, sc, flags) {
return false
}
case '\\':
if !noescape {
if len(pattern) > 0 {
c = unpackRune(&pattern)
}
}
fallthrough
default:
if len(s) == 0 {
return false
}
sc := unpackS()
switch {
case sc == c:
case casefold && unicode.ToLower(sc) == unicode.ToLower(c):
default:
return false
}
}
}
return len(s) == 0 || (leadingdir && s[0] == '/')
}
func rangematch(pattern *string, test int, flags int) bool {
if len(*pattern) == 0 {
return false
}
casefold := (flags&FNM_CASEFOLD != 0)
noescape := (flags&FNM_NOESCAPE != 0)
if casefold {
test = unicode.ToLower(test)
}
var negate, matched bool
if (*pattern)[0] == '^' || (*pattern)[0] == '!' {
negate = true
(*pattern) = (*pattern)[1:]
}
for !matched && len(*pattern) > 1 && (*pattern)[0] != ']' {
c := unpackRune(pattern)
if !noescape && c == '\\' {
if len(*pattern) > 1 {
c = unpackRune(pattern)
} else {
return false
}
}
if casefold {
c = unicode.ToLower(c)
}
if (*pattern)[0] == '-' && len(*pattern) > 1 && (*pattern)[1] != ']' {
unpackRune(pattern) // skip the -
c2 := unpackRune(pattern)
if !noescape && c2 == '\\' {
if len(*pattern) > 0 {
c2 = unpackRune(pattern)
} else {
return false
}
}
if casefold {
c2 = unicode.ToLower(c2)
}
// this really should be more intelligent, but it looks like
// fnmatch.c does simple int comparisons, therefore we will as well
if c <= test && test <= c2 {
matched = true
}
} else if c == test {
matched = true
}
}
// skip past the rest of the pattern
ok := false
for !ok && len(*pattern) > 0 {
c := unpackRune(pattern)
if c == '\\' && len(*pattern) > 0 {
unpackRune(pattern)
} else if c == ']' {
ok = true
}
}
return ok && matched != negate
}
// define strchr because strings.Index() seems a bit overkill
// returns the index of c in s, or -1 if there is no match
func strchr(s string, c int) int {
for i, sc := range s {
if sc == c {
return i
}
}
return -1
}
package fnmatch
import (
"gospec"
"testing"
)
func TestAllSpecs(t *testing.T) {
r := gospec.NewRunner()
r.AddSpec("MatchSpec", MatchSpec)
gospec.MainGoTest(r, t)
}
// This is a set of tests ported from a set of tests for C fnmatch
// found at http://www.mail-archive.com/bug-gnulib@gnu.org/msg14048.html
func TestMatch(t *testing.T) {
assert := func(p, s string) {
if !Match(p, s, 0) {
t.Errorf("Assertion failed: Match(%#v, %#v, 0)", p, s)
}
}
assert("", "")
assert("*", "")
assert("*", "foo")
assert("*", "bar")
assert("*", "*")
assert("**", "f")
assert("**", "foo.txt")
assert("*.*", "foo.txt")
assert("foo*.txt", "foobar.txt")
assert("foo.txt", "foo.txt")
assert("foo\\.txt", "foo.txt")
if Match("foo\\.txt", "foo.txt", FNM_NOESCAPE) {
t.Errorf("Assertion failed: Match(%#v, %#v, FNM_NOESCAPE) == false", "foo\\.txt", "foo.txt")
}
}
func MatchSpec(c *gospec.Context) {
c.Specify("A wildcard pattern \"*\" should match anything", func() {
p := "*"
c.Then(Match(p, "", 0)).Should.Equal(true)
c.Then(Match(p, "foo", 0)).Should.Equal(true)
c.Then(Match(p, "*", 0)).Should.Equal(true)
c.Then(Match(p, " ", 0)).Should.Equal(true)
c.Then(Match(p, ".foo", 0)).Should.Equal(true)
c.Then(Match(p, "わたし", 0)).Should.Equal(true)
testSlash := func(flags int, result bool) {
c.Then(Match(p, "foo/bar", flags)).Should.Equal(result)
c.Then(Match(p, "/", flags)).Should.Equal(result)
c.Then(Match(p, "/foo", flags)).Should.Equal(result)
c.Then(Match(p, "foo/", flags)).Should.Equal(result)
}
c.Specify("Including / when flags is 0", func() { testSlash(0, true) })
c.Specify("Except / when used with FNM_PATHNAME", func() { testSlash(FNM_PATHNAME, false) })
c.Specify("Except . in some circumstances when used with FNM_PERIOD", func() {
c.Then(Match("*", ".foo", FNM_PERIOD)).Should.Equal(false)
c.Then(Match("/*", "/.foo", FNM_PERIOD)).Should.Equal(true)
c.Then(Match("/*", "/.foo", FNM_PERIOD|FNM_PATHNAME)).Should.Equal(false)
})
})
c.Specify("A question mark pattern \"?\" should match a single character", func() {
p := "?"
c.Then(Match(p, "", 0)).Should.Equal(false)
c.Then(Match(p, "f", 0)).Should.Equal(true)
c.Then(Match(p, ".", 0)).Should.Equal(true)
c.Then(Match(p, "?", 0)).Should.Equal(true)
c.Then(Match(p, "foo", 0)).Should.Equal(false)
c.Then(Match(p, "わ", 0)).Should.Equal(true)
c.Then(Match(p, "わた", 0)).Should.Equal(false)
c.Specify("Even / when flags is 0", func() { c.Then(Match(p, "/", 0)).Should.Equal(true) })
c.Specify("Except / when flags is FNM_PATHNAME", func() { c.Then(Match(p, "/", FNM_PATHNAME)).Should.Equal(false) })
c.Specify("Except . sometimes when FNM_PERIOD is given", func() {
c.Then(Match("?", ".", FNM_PERIOD)).Should.Equal(false)
c.Then(Match("foo?", "foo.", FNM_PERIOD)).Should.Equal(true)
c.Then(Match("/?", "/.", FNM_PERIOD)).Should.Equal(true)
c.Then(Match("/?", "/.", FNM_PERIOD|FNM_PATHNAME)).Should.Equal(false)
})
})
c.Specify("A range pattern", func() {
p := "[a-z]"
c.Specify("Should match a single character inside its range", func() {
c.Then(Match(p, "a", 0)).Should.Equal(true)
c.Then(Match(p, "q", 0)).Should.Equal(true)
c.Then(Match(p, "z", 0)).Should.Equal(true)
c.Then(Match("[わ]", "わ", 0)).Should.Equal(true)
})
c.Specify("Should not match characters outside its range", func() {
c.Then(Match(p, "-", 0)).Should.Equal(false)
c.Then(Match(p, " ", 0)).Should.Equal(false)
c.Then(Match(p, "D", 0)).Should.Equal(false)
c.Then(Match(p, "é", 0)).Should.Equal(false)
})
c.Specify("Should only match one character", func() {
c.Then(Match(p, "ab", 0)).Should.Equal(false)
c.Then(Match(p, "", 0)).Should.Equal(false)
})
c.Specify("Should not consume more of the pattern than necessary", func() { c.Then(Match(p+"foo", "afoo", 0)).Should.Equal(true) })
c.Specify("Should match a - if it's the first or last character or backslash-escaped", func() {
c.Then(Match("[-az]", "-", 0)).Should.Equal(true)
c.Then(Match("[-az]", "a", 0)).Should.Equal(true)
c.Then(Match("[-az]", "b", 0)).Should.Equal(false)
c.Then(Match("[az-]", "-", 0)).Should.Equal(true)
c.Then(Match("[a\\-z]", "-", 0)).Should.Equal(true)
c.Then(Match("[a\\-z]", "b", 0)).Should.Equal(false)
c.Specify("And ignore \\ when FNM_NOESCAPE is given", func() {
c.Then(Match("[a\\-z]", "\\", FNM_NOESCAPE)).Should.Equal(true)
c.Then(Match("[a\\-z]", "-", FNM_NOESCAPE)).Should.Equal(false)
})
})
c.Specify("Should be negated if starting with ^ or !", func() {
c.Then(Match("[^a-z]", "a", 0)).Should.Equal(false)
c.Then(Match("[!a-z]", "b", 0)).Should.Equal(false)
c.Then(Match("[!a-z]", "é", 0)).Should.Equal(true)
c.Then(Match("[!a-z]", "わ", 0)).Should.Equal(true)
c.Specify("And still match - if following the negation character", func() {
c.Then(Match("[^-az]", "-", 0)).Should.Equal(false)
c.Then(Match("[^-az]", "b", 0)).Should.Equal(true)
})
})
c.Specify("Should support multiple characters/ranges", func() {
c.Then(Match("[abc]", "a", 0)).Should.Equal(true)
c.Then(Match("[abc]", "c", 0)).Should.Equal(true)
c.Then(Match("[abc]", "d", 0)).Should.Equal(false)
c.Then(Match("[a-cg-z]", "c", 0)).Should.Equal(true)
c.Then(Match("[a-cg-z]", "h", 0)).Should.Equal(true)
c.Then(Match("[a-cg-z]", "d", 0)).Should.Equal(false)
})
c.Specify("Should not match / when flags is FNM_PATHNAME", func() {
c.Then(Match("[abc/def]", "/", 0)).Should.Equal(true)
c.Then(Match("[abc/def]", "/", FNM_PATHNAME)).Should.Equal(false)
c.Then(Match("[.-0]", "/", 0)).Should.Equal(true) // .-0 includes /
c.Then(Match("[.-0]", "/", FNM_PATHNAME)).Should.Equal(false)
})
c.Specify("Should normally be case-sensitive", func() {
c.Then(Match("[a-z]", "A", 0)).Should.Equal(false)
c.Then(Match("[A-Z]", "a", 0)).Should.Equal(false)
c.Specify("Except when FNM_CASEFOLD is given", func() {
c.Then(Match("[a-z]", "A", FNM_CASEFOLD)).Should.Equal(true)
c.Then(Match("[A-Z]", "a", FNM_CASEFOLD)).Should.Equal(true)
})
})
// What about [a-c-f]? How should that behave? It's undocumented.
})
c.Specify("A backslash should escape the following character", func() {
c.Then(Match("\\\\", "\\", 0)).Should.Equal(true)
c.Then(Match("\\*", "*", 0)).Should.Equal(true)
c.Then(Match("\\*", "foo", 0)).Should.Equal(false)
c.Then(Match("\\?", "?", 0)).Should.Equal(true)
c.Then(Match("\\?", "f", 0)).Should.Equal(false)
c.Then(Match("\\[a-z]", "[a-z]", 0)).Should.Equal(true)
c.Then(Match("\\[a-z]", "a", 0)).Should.Equal(false)
c.Then(Match("\\foo", "foo", 0)).Should.Equal(true)
c.Then(Match("\\わ", "わ", 0)).Should.Equal(true)
c.Specify("Unless FNM_NOESCAPE is given", func() {
c.Then(Match("\\\\", "\\", FNM_NOESCAPE)).Should.Equal(false)
c.Then(Match("\\\\", "\\\\", FNM_NOESCAPE)).Should.Equal(true)
c.Then(Match("\\*", "foo", FNM_NOESCAPE)).Should.Equal(false)
c.Then(Match("\\*", "\\*", FNM_NOESCAPE)).Should.Equal(true)
})
})
c.Specify("Literal characters should match themselves", func() {
c.Then(Match("foo", "foo", 0)).Should.Equal(true)
c.Then(Match("foo", "foobar", 0)).Should.Equal(false)
c.Then(Match("foobar", "foo", 0)).Should.Equal(false)
c.Then(Match("foo", "Foo", 0)).Should.Equal(false)
c.Then(Match("わたし", "わたし", 0)).Should.Equal(true)
c.Specify("And perform case-folding when FNM_CASEFOLD is given", func() {
c.Then(Match("foo", "FOO", FNM_CASEFOLD)).Should.Equal(true)
c.Then(Match("FoO", "fOo", FNM_CASEFOLD)).Should.Equal(true)
})
})
c.Specify("FNM_LEADING_DIR should ignore trailing /*", func() {
c.Then(Match("foo", "foo/bar", 0)).Should.Equal(false)
c.Then(Match("foo", "foo/bar", FNM_LEADING_DIR)).Should.Equal(true)
c.Then(Match("*", "foo/bar", FNM_PATHNAME)).Should.Equal(false)
c.Then(Match("*", "foo/bar", FNM_PATHNAME|FNM_LEADING_DIR)).Should.Equal(true)
})
}
@calmh
Copy link

calmh commented Aug 15, 2014

Hi! Is this available in a package somewhere, or if not is there a license under which I may use this code in my own package?

@jeffwilliams
Copy link

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment