Skip to content

Instantly share code, notes, and snippets.

@airween
Last active March 7, 2020 13:01
Show Gist options
  • Save airween/8c5bfd5be011359edd918e964ca86178 to your computer and use it in GitHub Desktop.
Save airween/8c5bfd5be011359edd918e964ca86178 to your computer and use it in GitHub Desktop.
Using PCRE throught CFFI - Python3 example
#!/usr/bin/python3
import cffi
import sys
import argparse
import datetime
import time
aparser = argparse.ArgumentParser()
aparser.add_argument("patternfrom", type=str, help="Add filename to read the PATTERN")
aparser.add_argument("-s", "--sofile", type=str, help="Add path to shared object")
args = aparser.parse_args()
# default shared obj
sofile = "/lib/x86_64-linux-gnu/libpcre.so.3"
if args.sofile is not None:
sofile = args.sofile
# load ffi
ffi = cffi.FFI()
lib = ffi.dlopen(sofile)
C = ffi.dlopen(None)
# used types
header_content = """
typedef unsigned char uschar;
typedef struct real_pcre {
unsigned long int magic_number;
size_t size;
const unsigned char *tables;
unsigned long int options;
unsigned short int top_bracket;
unsigned short int top_backref;
uschar first_char;
uschar req_char;
uschar code[1];
} real_pcre;
typedef struct real_pcre pcre;
typedef struct real_pcre_extra {
uschar options;
uschar start_bits[32];
} real_pcre_extra;
typedef struct real_pcre_extra pcre_extra;
pcre *pcre_compile(const char * pattern, int options,
const char ** errptr, int * erroroffset,
const unsigned char * tableptr);
int pcre_exec (const pcre *code, const pcre_extra *extra,
const char *subject, int length, int startoffset,
int options, int *ovector, int ovecsize);
pcre_extra * pcre_study(const pcre *code, int options, const char **errptr);
int printf(const char *format, ...);
#define PCRE_STUDY_JIT_COMPILE 0x0001
"""
ffi.cdef(header_content)
# used functions
pcre_compile_fn = lib.pcre_compile
pcre_exec_fn = lib.pcre_exec
pcre_study_fn = lib.pcre_study
# used variables
# ==============
errptr = ffi.new("char **", None)
erroroffset = ffi.new("int *", None)
tableptr = ffi.new("unsigned char *", None)
# set up the pattern from file
with open(args.patternfrom, "r") as p:
pattern_raw = p.read().strip().encode("ascii")
pattern = ffi.new("char []", pattern_raw)
p.close()
re = pcre_compile_fn(pattern, 0, errptr, erroroffset, tableptr)
re_extra = pcre_study_fn(re, 1, errptr)
if re == ffi.NULL:
print("Invalid regex")
sys.exit(-2)
pextra = ffi.new("pcre_extra *", None)
subject_raw = b"1"*(128*1024) # subject for search
subject = ffi.new("char []", subject_raw)
ovector = ffi.new("int[900]")
print("Using pattern: %s" % (pattern_raw))
if len(subject_raw) > 30:
print("Using subject: %s..." % subject_raw[0:30])
else:
print("Using subject: %s".encode("ascii") % subject_raw)
for i in range(10):
dstart = datetime.datetime.now()
rc = pcre_exec_fn(re, re_extra, subject, len(ffi.string(subject)), 0, 0, ovector, 900)
dend = datetime.datetime.now()
tstart = time.mktime(dstart.timetuple()) + (dstart.microsecond / 1000000.0)
tend = time.mktime(dend.timetuple()) + (dend.microsecond / 1000000.0)
print("Time elapsed: %f" % (tend-tstart))
@mirkodziadzka-avi
Copy link

re = pcre_compile_fn(pattern, 0, errptr, erroroffset, tableptr)
re_extra = pcre_study_fn(re, 1, errptr)

instead of

re = pcre_compile_fn(pattern, 0, errptr, erroroffset, tableptr)
re_extra = pcre_study_fn(pattern, 1, errptr)

@mirkodziadzka-avi
Copy link

mirkodziadzka-avi commented Mar 7, 2020

some working C++ code (all error handling removed)

    m_pc = pcre_compile(pattern.c_str(), PCRE_DOTALL|PCRE_MULTILINE,  &errptr, &erroffset, NULL);
    m_pce = pcre_study(m_pc, PCRE_STUDY_JIT_COMPILE, &errptr);
    ...
    pcre_exec(m_pc, m_pce, input.c_str(), input.size(), offset, 0, ovector, ovector_size);

@airween
Copy link
Author

airween commented Mar 7, 2020

I see - thanks again. Fixed. The code is familiar to me... looks like it's from the libmodsecurity :).
But now it's very fast. The original pattern too. I'm still confused.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment