Created
July 18, 2013 23:58
-
-
Save mity/6034093 to your computer and use it in GitHub Desktop.
Unix file path pattern matching. Not as strong as regular expressions, but sufficient in many cases.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Unix file path pattern-based matching. | |
* Copyright (c) 2013 Martin Mitas | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining | |
* a copy of this software and associated documentation files (the "Software"), | |
* to deal in the Software without restriction, including without limitation | |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
* and/or sell copies of the Software, and to permit persons to whom the | |
* Software is furnished to do so, subject to the following conditions: | |
* | |
* The above copyright notice and this permission notice shall be included in | |
* all copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
* IN THE SOFTWARE. | |
*/ | |
#include "fpattern.h" | |
#include <ctype.h> | |
#include <malloc.h> | |
#include <stdint.h> | |
#include <string.h> | |
#if 0 | |
/* You can enable this for very simple debug output. */ | |
#include <stdio.h> | |
#define TRACE(...) \ | |
do { \ | |
printf(__VA_ARGS__); \ | |
printf("\n"); \ | |
} while(0) | |
#else | |
#define TRACE(...) | |
#endif | |
#define MAX_AUTOMATON_SIZE 4096 | |
#define OP_MATCH 0 | |
#define OP_CHAR 1 | |
#define OP_CHAR_NO_CASE 2 | |
#define OP_QESTIONMARK 3 | |
#define OP_ASTERISK 4 | |
#define OP_DOUBLEASTERISK 5 | |
/* Struct for verteces of finite automaton of the compiled pattern. */ | |
typedef struct vertex_tag vertex_t; | |
struct vertex_tag { | |
char op; | |
char c; /* valid only for OP_CHAR and OP_CHAR_NO_CASE */ | |
}; | |
typedef struct automaton_tag automaton_t; | |
struct automaton_tag { | |
size_t size; | |
vertex_t verteces[1]; | |
}; | |
int | |
fpattern_compile(void** out_fpattern, const char* pattern_str, unsigned flags) | |
{ | |
const char* inptr; | |
size_t size = 0; | |
automaton_t* automaton; | |
vertex_t* outptr; | |
if(pattern_str == NULL) | |
return FPATTERN_EINVAL; | |
/* Determine size of the automaton */ | |
inptr = pattern_str; | |
while(1) { | |
size++; | |
if(*inptr == '\\') { /* escaped OP_CHAR or OP_CHAR_NO_CASE */ | |
inptr++; | |
if(*inptr != '\0') | |
inptr++; | |
else if(flags & FPATTERN_FLAG_STRICT) | |
return FPATTERN_ESYNTAX; | |
} else if(*inptr == '\0') { /* OP_MATCH */ | |
break; | |
} else if(*inptr == '?') { /* OP_QESTIONMARK */ | |
inptr++; | |
} else if(*inptr == '*') { /* OP_ASTERISK or OP_DOUBLEASTERISK */ | |
inptr++; | |
if(*inptr == '*') | |
inptr++; | |
} else { /* OP_CHAR or OP_CHAR_NO_CASE */ | |
inptr++; | |
} | |
} | |
if(size > MAX_AUTOMATON_SIZE) | |
return FPATTERN_ELIMIT; | |
automaton = (automaton_t*) malloc(sizeof(automaton_t) + size * sizeof(vertex_t)); | |
if(automaton == NULL) | |
return FPATTERN_ENOMEM; | |
automaton->size = size; | |
/* Compile the pattern */ | |
inptr = pattern_str; | |
outptr = automaton->verteces; | |
while(1) { | |
if(*inptr == '\\') { /* escaped OP_CHAR or OP_CHAR_NO_CASE */ | |
inptr++; | |
if(*inptr) { | |
if(flags & FPATTERN_FLAG_IGNORECASE) { | |
outptr->op = OP_CHAR_NO_CASE; | |
outptr->c = tolower(*inptr); | |
TRACE("OP_CHAR_NO_CASE (%c)", outptr->c); | |
} else { | |
outptr->op = OP_CHAR; | |
outptr->c = *inptr; | |
TRACE("OP_CHAR (%c)", outptr->c); | |
} | |
inptr++; | |
outptr++; | |
} else { /* Dangling '\\' on pattern end. Treat as unescaped. */ | |
outptr->op = OP_CHAR; | |
outptr->c = '\\'; | |
TRACE("OP_CHAR (%c) [dangling]", outptr->c); | |
} | |
} else if(*inptr == '\0') { /* OP_MATCH */ | |
outptr->op = OP_MATCH; | |
TRACE("OP_MATCH"); | |
break; | |
} else if(*inptr == '?') { /* OP_QESTIONMARK */ | |
inptr++; | |
outptr->op = OP_QESTIONMARK; | |
outptr++; | |
TRACE("OP_QESTIONMARK"); | |
} else if(*inptr == '*') { /* OP_ASTERISK or OP_DOUBLEASTERISK */ | |
inptr++; | |
if(*inptr == '*') { | |
inptr++; | |
outptr->op = OP_DOUBLEASTERISK; | |
TRACE("OP_DOUBLEASTERISK"); | |
} else { | |
outptr->op = OP_ASTERISK; | |
TRACE("OP_ASTERISK"); | |
} | |
outptr++; | |
} else { /* OP_CHAR or OP_CHAR_NO_CASE */ | |
if(flags & FPATTERN_FLAG_IGNORECASE) { | |
outptr->op = OP_CHAR_NO_CASE; | |
outptr->c = tolower(*inptr); | |
TRACE("OP_CHAR_NO_CASE (%c)", outptr->c); | |
} else { | |
outptr->op = OP_CHAR; | |
outptr->c = *inptr; | |
TRACE("OP_CHAR (%c)", outptr->c); | |
} | |
inptr++; | |
outptr++; | |
} | |
} | |
*out_fpattern = (void*)automaton; | |
return 0; | |
} | |
static int | |
states_push(uint16_t* states, int n, uint16_t st) | |
{ | |
int i; | |
/* Look if already pushed */ | |
for(i = 0; i < n; i++) { | |
if(states[i] == st) | |
return n; | |
} | |
states[n] = st; | |
return n+1; | |
} | |
int | |
fpattern_match(const void* pattern, const char* strx) | |
{ | |
automaton_t* automaton = (automaton_t*) pattern; | |
uint16_t* states_tmp; | |
uint16_t* states_from; | |
uint16_t* states_to; | |
int states_from_count; | |
int states_to_count; | |
int i; | |
const char* str = strx; | |
states_tmp = alloca(2 * sizeof(uint16_t) * automaton->size); | |
states_from = states_tmp; | |
states_to = states_tmp + automaton->size; | |
/* We start with the single (first) state. */ | |
states_from[0] = 0; | |
states_from_count = 1; | |
states_to_count = 0; | |
#define STATE_PUSH(state) \ | |
do { \ | |
states_to_count = states_push(states_to, states_to_count, state); \ | |
TRACE("PUSH(%d)", state); \ | |
} while(0) | |
while(1) { | |
/* Handle all states */ | |
TRACE("EXAM '%c'", *str); | |
for(i = 0; i < states_from_count; i++) { | |
int st = states_from[i]; | |
vertex_t* vertex = automaton->verteces + st; | |
switch(vertex->op) { | |
case OP_MATCH: | |
if(*str == '\0') | |
return 1; | |
break; | |
case OP_CHAR: | |
if(vertex->c == *str) | |
STATE_PUSH(st + 1); | |
break; | |
case OP_CHAR_NO_CASE: | |
if(vertex->c == tolower(*str)) | |
STATE_PUSH(st + 1); | |
break; | |
case OP_QESTIONMARK: | |
if(*str != '/') | |
STATE_PUSH(st + 1); | |
break; | |
case OP_ASTERISK: | |
if(*str != '/' && *str != '\0') | |
STATE_PUSH(st); | |
STATE_PUSH(st + 1); | |
break; | |
case OP_DOUBLEASTERISK: | |
if(*str != '\0') | |
STATE_PUSH(st); | |
STATE_PUSH(st + 1); | |
break; | |
} | |
} | |
if(states_to_count == 0) /* no match */ | |
return 0; | |
/* Swap states_in and _out */ | |
states_tmp = states_from; | |
states_from = states_to; | |
states_from_count = states_to_count; | |
states_to = states_tmp; | |
states_to_count = 0; | |
/* Handle next char in 'str' */ | |
if(*str != '\0') | |
str++; | |
} | |
} | |
void | |
fpattern_free(void* fpattern) | |
{ | |
free(fpattern); | |
} | |
#ifdef TEST | |
#include <stdio.h> | |
static void | |
check(const char* pattern, const char* path) | |
{ | |
void* comp; | |
int match; | |
fpattern_compile(&comp, pattern, FPATTERN_FLAG_IGNORECASE); | |
printf("Checking '%s' against pattern '%s': ", path, pattern); | |
if(comp == NULL) { | |
printf("error\n"); | |
return; | |
} | |
match = fpattern_match(comp, path); | |
fpattern_free(comp); | |
printf("%s\n", match ? "match" : "no match"); | |
} | |
int | |
main(int argc, char** argv) | |
{ | |
check("/a/b/c", "/a/b/c"); | |
check("/a/b/c", "/a/b/d"); | |
check("/a/b/*", "/a/b/c"); | |
check("/a/b/c*", "/a/b/cc"); | |
check("/a/b/*", "/a/b/c/d"); | |
check("/a/b/c*", "/a/b/c"); | |
check("/a/**", "/a/b/c"); | |
check("**/b/**", "/a/b/c"); | |
check("**/x/**", "/a/b/c"); | |
return 0; | |
} | |
#endif /* TEST */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Unix file path pattern-based matching. | |
* Copyright (c) 2013 Martin Mitas | |
* | |
* Permission is hereby granted, free of charge, to any person obtaining | |
* a copy of this software and associated documentation files (the "Software"), | |
* to deal in the Software without restriction, including without limitation | |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
* and/or sell copies of the Software, and to permit persons to whom the | |
* Software is furnished to do so, subject to the following conditions: | |
* | |
* The above copyright notice and this permission notice shall be included in | |
* all copies or substantial portions of the Software. | |
* | |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
* IN THE SOFTWARE. | |
*/ | |
#ifndef FPATTERN_H | |
#define FPATTERN_H | |
/* Intended for path pattern matching. | |
* | |
* Pattern operators: | |
* ? ... matches any single character except for '/'. | |
* * ... matches any number of characters except for '/'. | |
* ** ... matches any number of characters including '/' (i.e. can match nultiple path components). | |
* \c ... '\' suppresses special meaning of arbitrary character 'c'. Use "\\" to match the '\' itself. | |
* c ... any other character matches only that particular character. | |
*/ | |
/* Error codes for fpattern_compile() */ | |
#define FPATTERN_OK 0 | |
#define FPATTERN_ENOMEM (-1) | |
#define FPATTERN_EINVAL (-2) | |
#define FPATTERN_ELIMIT (-3) | |
#define FPATTERN_ESYNTAX (-4) | |
/* Flags for fpattern_compile() */ | |
#define FPATTERN_FLAG_IGNORECASE 0x0001 | |
#define FPATTERN_FLAG_STRICT 0x0002 | |
/* Compile the pattern 'pattern_str' and place it into a newly allocated | |
* buffer. On success (FPATTERN_OK), pointer to the buffer is placed into out_fpattern. | |
* It is then supposed to be freed with fpattern_free(). | |
*/ | |
int fpattern_compile(void** out_fpattern, const char* pattern_str, unsigned flags); | |
/* Check if the pattern matches the string 'str'. | |
* Returns non-zero if the pattern matches and zero if it does not. | |
*/ | |
int fpattern_match(const void* fpattern, const char* str); | |
/* Free the compiled pattern */ | |
void fpattern_free(void* fpattern); | |
#endif /* FPATTERN_H */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment