-
-
Save airween/8c5bfd5be011359edd918e964ca86178 to your computer and use it in GitHub Desktop.
Using PCRE throught CFFI - Python3 example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import cffi | |
import sys | |
import argparse | |
import datetime | |
import time | |
aparser = argparse.ArgumentParser() | |
aparser.add_argument("patternfrom", type=str, help="Add filename to read the PATTERN") | |
aparser.add_argument("-s", "--sofile", type=str, help="Add path to shared object") | |
args = aparser.parse_args() | |
# default shared obj | |
sofile = "/lib/x86_64-linux-gnu/libpcre.so.3" | |
if args.sofile is not None: | |
sofile = args.sofile | |
# load ffi | |
ffi = cffi.FFI() | |
lib = ffi.dlopen(sofile) | |
C = ffi.dlopen(None) | |
# used types | |
header_content = """ | |
typedef unsigned char uschar; | |
typedef struct real_pcre { | |
unsigned long int magic_number; | |
size_t size; | |
const unsigned char *tables; | |
unsigned long int options; | |
unsigned short int top_bracket; | |
unsigned short int top_backref; | |
uschar first_char; | |
uschar req_char; | |
uschar code[1]; | |
} real_pcre; | |
typedef struct real_pcre pcre; | |
typedef struct real_pcre_extra { | |
uschar options; | |
uschar start_bits[32]; | |
} real_pcre_extra; | |
typedef struct real_pcre_extra pcre_extra; | |
pcre *pcre_compile(const char * pattern, int options, | |
const char ** errptr, int * erroroffset, | |
const unsigned char * tableptr); | |
int pcre_exec (const pcre *code, const pcre_extra *extra, | |
const char *subject, int length, int startoffset, | |
int options, int *ovector, int ovecsize); | |
pcre_extra * pcre_study(const pcre *code, int options, const char **errptr); | |
int printf(const char *format, ...); | |
#define PCRE_STUDY_JIT_COMPILE 0x0001 | |
""" | |
ffi.cdef(header_content) | |
# used functions | |
pcre_compile_fn = lib.pcre_compile | |
pcre_exec_fn = lib.pcre_exec | |
pcre_study_fn = lib.pcre_study | |
# used variables | |
# ============== | |
errptr = ffi.new("char **", None) | |
erroroffset = ffi.new("int *", None) | |
tableptr = ffi.new("unsigned char *", None) | |
# set up the pattern from file | |
with open(args.patternfrom, "r") as p: | |
pattern_raw = p.read().strip().encode("ascii") | |
pattern = ffi.new("char []", pattern_raw) | |
p.close() | |
re = pcre_compile_fn(pattern, 0, errptr, erroroffset, tableptr) | |
re_extra = pcre_study_fn(re, 1, errptr) | |
if re == ffi.NULL: | |
print("Invalid regex") | |
sys.exit(-2) | |
pextra = ffi.new("pcre_extra *", None) | |
subject_raw = b"1"*(128*1024) # subject for search | |
subject = ffi.new("char []", subject_raw) | |
ovector = ffi.new("int[900]") | |
print("Using pattern: %s" % (pattern_raw)) | |
if len(subject_raw) > 30: | |
print("Using subject: %s..." % subject_raw[0:30]) | |
else: | |
print("Using subject: %s".encode("ascii") % subject_raw) | |
for i in range(10): | |
dstart = datetime.datetime.now() | |
rc = pcre_exec_fn(re, re_extra, subject, len(ffi.string(subject)), 0, 0, ovector, 900) | |
dend = datetime.datetime.now() | |
tstart = time.mktime(dstart.timetuple()) + (dstart.microsecond / 1000000.0) | |
tend = time.mktime(dend.timetuple()) + (dend.microsecond / 1000000.0) | |
print("Time elapsed: %f" % (tend-tstart)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I see - thanks again. Fixed. The code is familiar to me... looks like it's from the libmodsecurity :).
But now it's very fast. The original pattern too. I'm still confused.