Created
December 19, 2011 09:11
-
-
Save yanolab/1496232 to your computer and use it in GitHub Desktop.
pcre for pypy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" libpcre wrapping""" | |
from pypy.rpython.tool import rffi_platform | |
from pypy.rpython.lltypesystem import lltype, rffi | |
from pypy.translator.tool.cbuild import ExternalCompilationInfo | |
from pypy.translator.platform import platform | |
from pypy.tool.ansi_print import ansi_log | |
import py | |
import os | |
log = py.log.Producer("libpcre") | |
py.log.setconsumer("libpcre", ansi_log) | |
includes = ['pcre.h'] | |
def find_libpcre(dirs=['/usr/lib/', '/usr/local/lib', '/usr/lib/i386-linux-gnu', '/usr/lib64/']): | |
""" NOTE: find your pcre library. | |
priority of static library is higher than dynamic library. | |
""" | |
libname = 'libpcre.a' | |
for x in dirs: | |
if os.path.exists(os.path.join(x, libname)): | |
foundlib = os.path.join(x, libname) | |
log.INFO("%s found in %s" % (libname, x)) | |
return ([foundlib], []) | |
log.warning("Cann't found libpcre.a. so use libpcre.so.") | |
return ([], ['pcre']) | |
link_files, libraries = find_libpcre() #[[],["pcre"]] # | |
eci = ExternalCompilationInfo( | |
includes = includes, | |
link_files = link_files, | |
libraries = libraries, | |
testonly_libraries = ['pcre'] | |
) | |
# type defines | |
TYPE_PCREP = rffi.COpaquePtr('pcre') | |
TYPE_PCRE_EXTRA = rffi.CStruct('pcre_extra', | |
('flags', rffi.ULONG), | |
('study_data', rffi.VOIDP), | |
('match_limit', rffi.ULONG), | |
('callout_data', rffi.VOIDP), | |
('tables', rffi.CCHARP), | |
('match_limit_recursion', rffi.ULONG), | |
('mark', rffi.CCHARPP)) | |
TYPE_PCRE_EXTRAP = lltype.Ptr(TYPE_PCRE_EXTRA) | |
# TODO | |
constans = [] | |
# TODO | |
class CConfig: | |
"""class configuration""" | |
_compilation_info_ = eci | |
PCRE_ERROR_NOMATCH = rffi_platform.ConstantInteger('PCRE_ERROR_NOMATCH') | |
# TODO | |
class cConfig: | |
"""C configuration""" | |
pass | |
for k,v in rffi_platform.configure(CConfig).items(): | |
setattr(cConfig, k, v) | |
# TODO: define other functions | |
externals = [ | |
("pcre_version", [], rffi.CCHARP), | |
("pcre_compile", [rffi.CCHARP, # pattern | |
rffi.INT, # options | |
rffi.CCHARPP, # errptr | |
rffi.INTP, # erroffset | |
rffi.UCHARP, # tableptr | |
], TYPE_PCREP), | |
("pcre_exec", [TYPE_PCREP, # code | |
TYPE_PCRE_EXTRAP, # pcre_extra | |
rffi.CCHARP, # subject | |
rffi.INT, # length | |
rffi.INT, # startoffset | |
rffi.INT, # options | |
rffi.INTP, # ovector | |
rffi.INT, # ovecsize | |
], rffi.INT), | |
("pcre_free", [rffi.VOIDP], rffi.VOIDP) | |
] | |
def pcre_external(name, args, result, **kw): | |
"""external function""" | |
return rffi.llexternal(name, args, result, compilation_info=eci, **kw) | |
_raw_apis = {} | |
for name, args, result in externals: | |
_raw_apis[name] = pcre_external(name, args, result) | |
# NOTE: static cache | |
_pcre_version = rffi.charp2str(_raw_apis['pcre_version']()) | |
def lib_version(): | |
"""return pcre library version""" | |
return _pcre_version | |
def version(): | |
"""retrun pypypcre version""" | |
return "0.0.1" | |
class PCREMatch: | |
"""This class like SRE_Match""" | |
def __init__(self, groups): | |
self._group = groups[0] | |
if len(groups) >= 2: | |
self._groups = [x[0] for x in groups[1:]] | |
else: | |
self._groups = [] | |
def groups(self): | |
"""return match group list""" | |
return self._groups | |
def group(self): | |
"""return match sentence""" | |
return self._group[0] | |
def start(self): | |
"""return first match position""" | |
return int(self._group[1]) | |
def end(self): | |
"""return last match position""" | |
return int(self._group[2]) | |
def span(self): | |
"""return first,end position tuple""" | |
return (self.start(), self.end()) | |
class PCREPattern: | |
"""This class like SRE_Pattern""" | |
def __init__(self, c_pcre): | |
self._c_pcre = c_pcre | |
# def __del__(self): | |
# print self._c_pcre | |
# _raw_apis['pcre_free'](self._c_pcre) | |
def match(self, string, flags=0): | |
"""like SRE_Pattern.match method""" | |
# Max grouping size is 10! not 30. | |
ovecsize = 10*3 | |
ovector = lltype.malloc(rffi.CArray(rffi.INT), ovecsize+1, flavor='raw') | |
matched = _raw_apis['pcre_exec'](self._c_pcre, | |
lltype.nullptr(TYPE_PCRE_EXTRAP.TO), | |
string, | |
len(string), | |
0, | |
0, | |
ovector, | |
ovecsize) | |
if matched <= 0: | |
return None | |
# TODO: if matched unless than 0, we has internal error. | |
lst = [] | |
for idx in range(0, matched*2, 2): | |
start_pos = ovector[idx] | |
end_pos = ovector[idx+1] | |
if start_pos < 0 or end_pos < 0: | |
print start_pos, end_pos | |
continue | |
lst.append((string[start_pos:end_pos], str(start_pos), str(end_pos))) | |
lltype.free(ovector, flavor='raw') | |
return PCREMatch(lst) | |
def compile(pattern, flags=0): | |
"""compile pattern""" | |
errptr = lltype.malloc(rffi.CArray(rffi.CCHARP), 1, flavor='raw') | |
erroffset = lltype.malloc(rffi.CArray(rffi.INT), 1, flavor='raw') | |
c_pcre = _raw_apis['pcre_compile'](pattern, flags, errptr, erroffset, lltype.nullptr(rffi.UCHARP.TO)) | |
lltype.free(errptr, flavor='raw') | |
lltype.free(erroffset, flavor='raw') | |
return PCREPattern(c_pcre) | |
def match(pattern, string, flags=0): | |
"""match pattern""" | |
return compile(pattern).match(string, flags) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import pypypcre as re | |
import sys | |
def usage(): | |
"""print usage""" | |
print "usage: regextest pattern string" | |
return 0 | |
def log(msg, LEVEL="INFO"): | |
"""print log message""" | |
print "%s:%s" % (LEVEL, msg) | |
return 0 | |
def error(msg): | |
"""print error message""" | |
log(msg, LEVEL="ERROR") | |
def main(argv): | |
if not len(argv) == 3: | |
error("invalid argments") | |
usage() | |
return 1 | |
m = re.match(argv[1], argv[2]) | |
if m is None: | |
log("No matches") | |
return 1 | |
print m.group() | |
print m.start() | |
print m.end() | |
print m.span() | |
log("Matched group size: %d" % (len(m.groups()), )) | |
for item in m.groups(): | |
print item | |
return 0 | |
def target(*argv): | |
return main, None | |
if __name__ == '__main__': | |
main(sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment