Skip to content

Instantly share code, notes, and snippets.

@yanolab
Created December 19, 2011 09:11
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save yanolab/1496232 to your computer and use it in GitHub Desktop.
Save yanolab/1496232 to your computer and use it in GitHub Desktop.
pcre for pypy
""" libpcre wrapping"""
from pypy.rpython.tool import rffi_platform
from pypy.rpython.lltypesystem import lltype, rffi
from pypy.translator.tool.cbuild import ExternalCompilationInfo
from pypy.translator.platform import platform
from pypy.tool.ansi_print import ansi_log
import py
import os
log = py.log.Producer("libpcre")
py.log.setconsumer("libpcre", ansi_log)
includes = ['pcre.h']
def find_libpcre(dirs=['/usr/lib/', '/usr/local/lib', '/usr/lib/i386-linux-gnu', '/usr/lib64/']):
""" NOTE: find your pcre library.
priority of static library is higher than dynamic library.
"""
libname = 'libpcre.a'
for x in dirs:
if os.path.exists(os.path.join(x, libname)):
foundlib = os.path.join(x, libname)
log.INFO("%s found in %s" % (libname, x))
return ([foundlib], [])
log.warning("Cann't found libpcre.a. so use libpcre.so.")
return ([], ['pcre'])
link_files, libraries = find_libpcre() #[[],["pcre"]] #
eci = ExternalCompilationInfo(
includes = includes,
link_files = link_files,
libraries = libraries,
testonly_libraries = ['pcre']
)
# type defines
TYPE_PCREP = rffi.COpaquePtr('pcre')
TYPE_PCRE_EXTRA = rffi.CStruct('pcre_extra',
('flags', rffi.ULONG),
('study_data', rffi.VOIDP),
('match_limit', rffi.ULONG),
('callout_data', rffi.VOIDP),
('tables', rffi.CCHARP),
('match_limit_recursion', rffi.ULONG),
('mark', rffi.CCHARPP))
TYPE_PCRE_EXTRAP = lltype.Ptr(TYPE_PCRE_EXTRA)
# TODO
constans = []
# TODO
class CConfig:
"""class configuration"""
_compilation_info_ = eci
PCRE_ERROR_NOMATCH = rffi_platform.ConstantInteger('PCRE_ERROR_NOMATCH')
# TODO
class cConfig:
"""C configuration"""
pass
for k,v in rffi_platform.configure(CConfig).items():
setattr(cConfig, k, v)
# TODO: define other functions
externals = [
("pcre_version", [], rffi.CCHARP),
("pcre_compile", [rffi.CCHARP, # pattern
rffi.INT, # options
rffi.CCHARPP, # errptr
rffi.INTP, # erroffset
rffi.UCHARP, # tableptr
], TYPE_PCREP),
("pcre_exec", [TYPE_PCREP, # code
TYPE_PCRE_EXTRAP, # pcre_extra
rffi.CCHARP, # subject
rffi.INT, # length
rffi.INT, # startoffset
rffi.INT, # options
rffi.INTP, # ovector
rffi.INT, # ovecsize
], rffi.INT),
("pcre_free", [rffi.VOIDP], rffi.VOIDP)
]
def pcre_external(name, args, result, **kw):
"""external function"""
return rffi.llexternal(name, args, result, compilation_info=eci, **kw)
_raw_apis = {}
for name, args, result in externals:
_raw_apis[name] = pcre_external(name, args, result)
# NOTE: static cache
_pcre_version = rffi.charp2str(_raw_apis['pcre_version']())
def lib_version():
"""return pcre library version"""
return _pcre_version
def version():
"""retrun pypypcre version"""
return "0.0.1"
class PCREMatch:
"""This class like SRE_Match"""
def __init__(self, groups):
self._group = groups[0]
if len(groups) >= 2:
self._groups = [x[0] for x in groups[1:]]
else:
self._groups = []
def groups(self):
"""return match group list"""
return self._groups
def group(self):
"""return match sentence"""
return self._group[0]
def start(self):
"""return first match position"""
return int(self._group[1])
def end(self):
"""return last match position"""
return int(self._group[2])
def span(self):
"""return first,end position tuple"""
return (self.start(), self.end())
class PCREPattern:
"""This class like SRE_Pattern"""
def __init__(self, c_pcre):
self._c_pcre = c_pcre
# def __del__(self):
# print self._c_pcre
# _raw_apis['pcre_free'](self._c_pcre)
def match(self, string, flags=0):
"""like SRE_Pattern.match method"""
# Max grouping size is 10! not 30.
ovecsize = 10*3
ovector = lltype.malloc(rffi.CArray(rffi.INT), ovecsize+1, flavor='raw')
matched = _raw_apis['pcre_exec'](self._c_pcre,
lltype.nullptr(TYPE_PCRE_EXTRAP.TO),
string,
len(string),
0,
0,
ovector,
ovecsize)
if matched <= 0:
return None
# TODO: if matched unless than 0, we has internal error.
lst = []
for idx in range(0, matched*2, 2):
start_pos = ovector[idx]
end_pos = ovector[idx+1]
if start_pos < 0 or end_pos < 0:
print start_pos, end_pos
continue
lst.append((string[start_pos:end_pos], str(start_pos), str(end_pos)))
lltype.free(ovector, flavor='raw')
return PCREMatch(lst)
def compile(pattern, flags=0):
"""compile pattern"""
errptr = lltype.malloc(rffi.CArray(rffi.CCHARP), 1, flavor='raw')
erroffset = lltype.malloc(rffi.CArray(rffi.INT), 1, flavor='raw')
c_pcre = _raw_apis['pcre_compile'](pattern, flags, errptr, erroffset, lltype.nullptr(rffi.UCHARP.TO))
lltype.free(errptr, flavor='raw')
lltype.free(erroffset, flavor='raw')
return PCREPattern(c_pcre)
def match(pattern, string, flags=0):
"""match pattern"""
return compile(pattern).match(string, flags)
# -*- coding: utf-8 -*-
import pypypcre as re
import sys
def usage():
"""print usage"""
print "usage: regextest pattern string"
return 0
def log(msg, LEVEL="INFO"):
"""print log message"""
print "%s:%s" % (LEVEL, msg)
return 0
def error(msg):
"""print error message"""
log(msg, LEVEL="ERROR")
def main(argv):
if not len(argv) == 3:
error("invalid argments")
usage()
return 1
m = re.match(argv[1], argv[2])
if m is None:
log("No matches")
return 1
print m.group()
print m.start()
print m.end()
print m.span()
log("Matched group size: %d" % (len(m.groups()), ))
for item in m.groups():
print item
return 0
def target(*argv):
return main, None
if __name__ == '__main__':
main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment