Skip to content

Instantly share code, notes, and snippets.

@mity
Created July 18, 2013 23:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mity/6034093 to your computer and use it in GitHub Desktop.
Save mity/6034093 to your computer and use it in GitHub Desktop.
Unix file path pattern matching. Not as strong as regular expressions, but sufficient in many cases.
/*
* Unix file path pattern-based matching.
* Copyright (c) 2013 Martin Mitas
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "fpattern.h"
#include <ctype.h>
#include <malloc.h>
#include <stdint.h>
#include <string.h>
#if 0
/* You can enable this for very simple debug output. */
#include <stdio.h>
#define TRACE(...) \
do { \
printf(__VA_ARGS__); \
printf("\n"); \
} while(0)
#else
#define TRACE(...)
#endif
#define MAX_AUTOMATON_SIZE 4096
#define OP_MATCH 0
#define OP_CHAR 1
#define OP_CHAR_NO_CASE 2
#define OP_QESTIONMARK 3
#define OP_ASTERISK 4
#define OP_DOUBLEASTERISK 5
/* Struct for verteces of finite automaton of the compiled pattern. */
typedef struct vertex_tag vertex_t;
struct vertex_tag {
char op;
char c; /* valid only for OP_CHAR and OP_CHAR_NO_CASE */
};
typedef struct automaton_tag automaton_t;
struct automaton_tag {
size_t size;
vertex_t verteces[1];
};
int
fpattern_compile(void** out_fpattern, const char* pattern_str, unsigned flags)
{
const char* inptr;
size_t size = 0;
automaton_t* automaton;
vertex_t* outptr;
if(pattern_str == NULL)
return FPATTERN_EINVAL;
/* Determine size of the automaton */
inptr = pattern_str;
while(1) {
size++;
if(*inptr == '\\') { /* escaped OP_CHAR or OP_CHAR_NO_CASE */
inptr++;
if(*inptr != '\0')
inptr++;
else if(flags & FPATTERN_FLAG_STRICT)
return FPATTERN_ESYNTAX;
} else if(*inptr == '\0') { /* OP_MATCH */
break;
} else if(*inptr == '?') { /* OP_QESTIONMARK */
inptr++;
} else if(*inptr == '*') { /* OP_ASTERISK or OP_DOUBLEASTERISK */
inptr++;
if(*inptr == '*')
inptr++;
} else { /* OP_CHAR or OP_CHAR_NO_CASE */
inptr++;
}
}
if(size > MAX_AUTOMATON_SIZE)
return FPATTERN_ELIMIT;
automaton = (automaton_t*) malloc(sizeof(automaton_t) + size * sizeof(vertex_t));
if(automaton == NULL)
return FPATTERN_ENOMEM;
automaton->size = size;
/* Compile the pattern */
inptr = pattern_str;
outptr = automaton->verteces;
while(1) {
if(*inptr == '\\') { /* escaped OP_CHAR or OP_CHAR_NO_CASE */
inptr++;
if(*inptr) {
if(flags & FPATTERN_FLAG_IGNORECASE) {
outptr->op = OP_CHAR_NO_CASE;
outptr->c = tolower(*inptr);
TRACE("OP_CHAR_NO_CASE (%c)", outptr->c);
} else {
outptr->op = OP_CHAR;
outptr->c = *inptr;
TRACE("OP_CHAR (%c)", outptr->c);
}
inptr++;
outptr++;
} else { /* Dangling '\\' on pattern end. Treat as unescaped. */
outptr->op = OP_CHAR;
outptr->c = '\\';
TRACE("OP_CHAR (%c) [dangling]", outptr->c);
}
} else if(*inptr == '\0') { /* OP_MATCH */
outptr->op = OP_MATCH;
TRACE("OP_MATCH");
break;
} else if(*inptr == '?') { /* OP_QESTIONMARK */
inptr++;
outptr->op = OP_QESTIONMARK;
outptr++;
TRACE("OP_QESTIONMARK");
} else if(*inptr == '*') { /* OP_ASTERISK or OP_DOUBLEASTERISK */
inptr++;
if(*inptr == '*') {
inptr++;
outptr->op = OP_DOUBLEASTERISK;
TRACE("OP_DOUBLEASTERISK");
} else {
outptr->op = OP_ASTERISK;
TRACE("OP_ASTERISK");
}
outptr++;
} else { /* OP_CHAR or OP_CHAR_NO_CASE */
if(flags & FPATTERN_FLAG_IGNORECASE) {
outptr->op = OP_CHAR_NO_CASE;
outptr->c = tolower(*inptr);
TRACE("OP_CHAR_NO_CASE (%c)", outptr->c);
} else {
outptr->op = OP_CHAR;
outptr->c = *inptr;
TRACE("OP_CHAR (%c)", outptr->c);
}
inptr++;
outptr++;
}
}
*out_fpattern = (void*)automaton;
return 0;
}
static int
states_push(uint16_t* states, int n, uint16_t st)
{
int i;
/* Look if already pushed */
for(i = 0; i < n; i++) {
if(states[i] == st)
return n;
}
states[n] = st;
return n+1;
}
int
fpattern_match(const void* pattern, const char* strx)
{
automaton_t* automaton = (automaton_t*) pattern;
uint16_t* states_tmp;
uint16_t* states_from;
uint16_t* states_to;
int states_from_count;
int states_to_count;
int i;
const char* str = strx;
states_tmp = alloca(2 * sizeof(uint16_t) * automaton->size);
states_from = states_tmp;
states_to = states_tmp + automaton->size;
/* We start with the single (first) state. */
states_from[0] = 0;
states_from_count = 1;
states_to_count = 0;
#define STATE_PUSH(state) \
do { \
states_to_count = states_push(states_to, states_to_count, state); \
TRACE("PUSH(%d)", state); \
} while(0)
while(1) {
/* Handle all states */
TRACE("EXAM '%c'", *str);
for(i = 0; i < states_from_count; i++) {
int st = states_from[i];
vertex_t* vertex = automaton->verteces + st;
switch(vertex->op) {
case OP_MATCH:
if(*str == '\0')
return 1;
break;
case OP_CHAR:
if(vertex->c == *str)
STATE_PUSH(st + 1);
break;
case OP_CHAR_NO_CASE:
if(vertex->c == tolower(*str))
STATE_PUSH(st + 1);
break;
case OP_QESTIONMARK:
if(*str != '/')
STATE_PUSH(st + 1);
break;
case OP_ASTERISK:
if(*str != '/' && *str != '\0')
STATE_PUSH(st);
STATE_PUSH(st + 1);
break;
case OP_DOUBLEASTERISK:
if(*str != '\0')
STATE_PUSH(st);
STATE_PUSH(st + 1);
break;
}
}
if(states_to_count == 0) /* no match */
return 0;
/* Swap states_in and _out */
states_tmp = states_from;
states_from = states_to;
states_from_count = states_to_count;
states_to = states_tmp;
states_to_count = 0;
/* Handle next char in 'str' */
if(*str != '\0')
str++;
}
}
void
fpattern_free(void* fpattern)
{
free(fpattern);
}
#ifdef TEST
#include <stdio.h>
static void
check(const char* pattern, const char* path)
{
void* comp;
int match;
fpattern_compile(&comp, pattern, FPATTERN_FLAG_IGNORECASE);
printf("Checking '%s' against pattern '%s': ", path, pattern);
if(comp == NULL) {
printf("error\n");
return;
}
match = fpattern_match(comp, path);
fpattern_free(comp);
printf("%s\n", match ? "match" : "no match");
}
int
main(int argc, char** argv)
{
check("/a/b/c", "/a/b/c");
check("/a/b/c", "/a/b/d");
check("/a/b/*", "/a/b/c");
check("/a/b/c*", "/a/b/cc");
check("/a/b/*", "/a/b/c/d");
check("/a/b/c*", "/a/b/c");
check("/a/**", "/a/b/c");
check("**/b/**", "/a/b/c");
check("**/x/**", "/a/b/c");
return 0;
}
#endif /* TEST */
/*
* Unix file path pattern-based matching.
* Copyright (c) 2013 Martin Mitas
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef FPATTERN_H
#define FPATTERN_H
/* Intended for path pattern matching.
*
* Pattern operators:
* ? ... matches any single character except for '/'.
* * ... matches any number of characters except for '/'.
* ** ... matches any number of characters including '/' (i.e. can match nultiple path components).
* \c ... '\' suppresses special meaning of arbitrary character 'c'. Use "\\" to match the '\' itself.
* c ... any other character matches only that particular character.
*/
/* Error codes for fpattern_compile() */
#define FPATTERN_OK 0
#define FPATTERN_ENOMEM (-1)
#define FPATTERN_EINVAL (-2)
#define FPATTERN_ELIMIT (-3)
#define FPATTERN_ESYNTAX (-4)
/* Flags for fpattern_compile() */
#define FPATTERN_FLAG_IGNORECASE 0x0001
#define FPATTERN_FLAG_STRICT 0x0002
/* Compile the pattern 'pattern_str' and place it into a newly allocated
* buffer. On success (FPATTERN_OK), pointer to the buffer is placed into out_fpattern.
* It is then supposed to be freed with fpattern_free().
*/
int fpattern_compile(void** out_fpattern, const char* pattern_str, unsigned flags);
/* Check if the pattern matches the string 'str'.
* Returns non-zero if the pattern matches and zero if it does not.
*/
int fpattern_match(const void* fpattern, const char* str);
/* Free the compiled pattern */
void fpattern_free(void* fpattern);
#endif /* FPATTERN_H */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment