Skip to content

Instantly share code, notes, and snippets.

@vodik
Last active January 1, 2016 08:18
Show Gist options
  • Save vodik/8116979 to your computer and use it in GitHub Desktop.
Save vodik/8116979 to your computer and use it in GitHub Desktop.
Quick file search for local pacman files db
#include "boyermoore.h"
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <limits.h>
static inline int max(int a, int b) { return (a < b) ? b : a; }
static inline bool is_prefix(char *word, size_t wordlen, size_t pos)
{
size_t suffixlen = wordlen - pos;
return strncmp(word, &word[pos], suffixlen) == 0;
}
static size_t suffix_length(char *word, size_t wordlen, size_t pos)
{
size_t i;
for (i = 0; word[pos - i] == word[wordlen - 1 - i] && i < pos; i++);
return i;
}
static void make_delta1(int delta[static CHAR_MAX], char *pat, size_t patlen)
{
for (size_t i = 0; i < CHAR_MAX; i++)
delta[i] = patlen;
for (size_t i = 0; i < patlen - 1; i++)
delta[(int)pat[i]] = patlen - 1 - i;
}
static void make_delta2(int *delta, char *pat, size_t patlen)
{
size_t last_prefix_index = patlen - 1;
for (size_t p = patlen; p > 0; p--) {
if (is_prefix(pat, patlen, p))
last_prefix_index = p;
delta[p - 1] = last_prefix_index + patlen - p;
}
for (size_t p = 0; p < patlen - 1; p++) {
size_t slen = suffix_length(pat, patlen, p);
if (pat[p - slen] != pat[patlen - 1 - slen])
delta[patlen - 1 - slen] = patlen - 1 - p + slen;
}
}
int boyer_moore_compile(boyer_moore_t *bm, char *pat, size_t patlen)
{
bm->delta2 = malloc(sizeof(int) * patlen);
bm->pattern = strndup(pat, patlen);
bm->length = patlen;
make_delta1(bm->delta1, pat, patlen);
make_delta2(bm->delta2, pat, patlen);
return 0;
}
char *boyer_moore_search(char *haystack, size_t len, boyer_moore_t *bm)
{
size_t i = bm->length;
while (i < len) {
size_t j = bm->length;
for (; j > 0 && haystack[i - 1] == bm->pattern[j - 1]; --i, --j);
if (j == 0)
return &haystack[i];
i += max(bm->delta1[(int)haystack[i - 1]], bm->delta2[j - 1]);
}
return NULL;
}
#pragma once
#include <stddef.h>
#include <limits.h>
typedef struct boyer_moore {
size_t length;
char *pattern;
int *delta2;
int delta1[CHAR_MAX];
} boyer_moore_t;
int boyer_moore_compile(boyer_moore_t *bm, char *pat, size_t patlen);
char *boyer_moore_search(char *haystack, size_t len, boyer_moore_t *bm);
CFLAGS := -std=c99 \
-Wall -Wextra -pedantic \
-Wshadow -Wpointer-arith -Wcast-qual -Wstrict-prototypes -Wmissing-prototypes \
-D_GNU_SOURCE \
${CFLAGS}
all: search
search: search.o boyermoore.o
clean:
${RM} search *.o
.PHONY: clean
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <ftw.h>
#include "boyermoore.h"
#define _unused_ __attribute__((unused))
char *pattern;
size_t patlen;
boyer_moore_t bm;
static void print_line(const char *line)
{
char copy[BUFSIZ];
strncpy(copy, line, strcspn(line, "\n\t"));
printf("FOUND: %s\n", copy);
}
static int do_ftw(const char *fpath, const struct stat _unused_ *sb, int typeflag)
{
FILE *fp;
char buf[BUFSIZ], *p = buf;
if (typeflag != FTW_F)
return 0;
fp = fopen(fpath, "r");
if (!fp) {
fprintf(stderr, "failed to open file\n");
return 1;
}
int nbytes = fread(buf, 1, BUFSIZ, fp);
bool first = true;
while (1) {
char *ret = boyer_moore_search(p, nbytes, &bm);
if (!ret)
break;
if (ret[patlen] == '\t' || ret[patlen] == '\n') {
if (first)
printf(">>> %s\n", fpath);
first = false;
print_line(ret);
}
nbytes -= ret - p - patlen;
p = &ret[patlen];
}
fclose(fp);
return 0;
}
int main(int argc, char *argv[])
{
pattern = argv[1];
patlen = strlen(pattern);
if (argc == 1)
return 1;
if (pattern[0] == '/') {
++pattern;
--patlen;
}
boyer_moore_compile(&bm, pattern, patlen);
ftw("/var/lib/pacman/local", do_ftw, 1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment