Last active
January 1, 2016 08:18
-
-
Save vodik/8116979 to your computer and use it in GitHub Desktop.
Quick file search for local pacman files db
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "boyermoore.h" | |
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <string.h> | |
#include <limits.h> | |
static inline int max(int a, int b) { return (a < b) ? b : a; } | |
static inline bool is_prefix(char *word, size_t wordlen, size_t pos) | |
{ | |
size_t suffixlen = wordlen - pos; | |
return strncmp(word, &word[pos], suffixlen) == 0; | |
} | |
static size_t suffix_length(char *word, size_t wordlen, size_t pos) | |
{ | |
size_t i; | |
for (i = 0; word[pos - i] == word[wordlen - 1 - i] && i < pos; i++); | |
return i; | |
} | |
static void make_delta1(int delta[static CHAR_MAX], char *pat, size_t patlen) | |
{ | |
for (size_t i = 0; i < CHAR_MAX; i++) | |
delta[i] = patlen; | |
for (size_t i = 0; i < patlen - 1; i++) | |
delta[(int)pat[i]] = patlen - 1 - i; | |
} | |
static void make_delta2(int *delta, char *pat, size_t patlen) | |
{ | |
size_t last_prefix_index = patlen - 1; | |
for (size_t p = patlen; p > 0; p--) { | |
if (is_prefix(pat, patlen, p)) | |
last_prefix_index = p; | |
delta[p - 1] = last_prefix_index + patlen - p; | |
} | |
for (size_t p = 0; p < patlen - 1; p++) { | |
size_t slen = suffix_length(pat, patlen, p); | |
if (pat[p - slen] != pat[patlen - 1 - slen]) | |
delta[patlen - 1 - slen] = patlen - 1 - p + slen; | |
} | |
} | |
int boyer_moore_compile(boyer_moore_t *bm, char *pat, size_t patlen) | |
{ | |
bm->delta2 = malloc(sizeof(int) * patlen); | |
bm->pattern = strndup(pat, patlen); | |
bm->length = patlen; | |
make_delta1(bm->delta1, pat, patlen); | |
make_delta2(bm->delta2, pat, patlen); | |
return 0; | |
} | |
char *boyer_moore_search(char *haystack, size_t len, boyer_moore_t *bm) | |
{ | |
size_t i = bm->length; | |
while (i < len) { | |
size_t j = bm->length; | |
for (; j > 0 && haystack[i - 1] == bm->pattern[j - 1]; --i, --j); | |
if (j == 0) | |
return &haystack[i]; | |
i += max(bm->delta1[(int)haystack[i - 1]], bm->delta2[j - 1]); | |
} | |
return NULL; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <stddef.h> | |
#include <limits.h> | |
typedef struct boyer_moore { | |
size_t length; | |
char *pattern; | |
int *delta2; | |
int delta1[CHAR_MAX]; | |
} boyer_moore_t; | |
int boyer_moore_compile(boyer_moore_t *bm, char *pat, size_t patlen); | |
char *boyer_moore_search(char *haystack, size_t len, boyer_moore_t *bm); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CFLAGS := -std=c99 \ | |
-Wall -Wextra -pedantic \ | |
-Wshadow -Wpointer-arith -Wcast-qual -Wstrict-prototypes -Wmissing-prototypes \ | |
-D_GNU_SOURCE \ | |
${CFLAGS} | |
all: search | |
search: search.o boyermoore.o | |
clean: | |
${RM} search *.o | |
.PHONY: clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <stdio.h> | |
#include <stdbool.h> | |
#include <string.h> | |
#include <ftw.h> | |
#include "boyermoore.h" | |
#define _unused_ __attribute__((unused)) | |
char *pattern; | |
size_t patlen; | |
boyer_moore_t bm; | |
static void print_line(const char *line) | |
{ | |
char copy[BUFSIZ]; | |
strncpy(copy, line, strcspn(line, "\n\t")); | |
printf("FOUND: %s\n", copy); | |
} | |
static int do_ftw(const char *fpath, const struct stat _unused_ *sb, int typeflag) | |
{ | |
FILE *fp; | |
char buf[BUFSIZ], *p = buf; | |
if (typeflag != FTW_F) | |
return 0; | |
fp = fopen(fpath, "r"); | |
if (!fp) { | |
fprintf(stderr, "failed to open file\n"); | |
return 1; | |
} | |
int nbytes = fread(buf, 1, BUFSIZ, fp); | |
bool first = true; | |
while (1) { | |
char *ret = boyer_moore_search(p, nbytes, &bm); | |
if (!ret) | |
break; | |
if (ret[patlen] == '\t' || ret[patlen] == '\n') { | |
if (first) | |
printf(">>> %s\n", fpath); | |
first = false; | |
print_line(ret); | |
} | |
nbytes -= ret - p - patlen; | |
p = &ret[patlen]; | |
} | |
fclose(fp); | |
return 0; | |
} | |
int main(int argc, char *argv[]) | |
{ | |
pattern = argv[1]; | |
patlen = strlen(pattern); | |
if (argc == 1) | |
return 1; | |
if (pattern[0] == '/') { | |
++pattern; | |
--patlen; | |
} | |
boyer_moore_compile(&bm, pattern, patlen); | |
ftw("/var/lib/pacman/local", do_ftw, 1); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment