Skip to content

Instantly share code, notes, and snippets.

@drslump
Forked from pablito56/check_forbidden.py
Last active December 17, 2015 10:59
Show Gist options
  • Save drslump/5599280 to your computer and use it in GitHub Desktop.
Save drslump/5599280 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
#-*- coding: utf-8 -*-
u"""
Created on May 15, 2013
@author: pablito56
@license: MIT
@contact: pablito56@gmail.com
Performance comparisson of different options to check existence of forbidden
symbols in incoming strings (standard 'char in string' vs. RegExp).
It checks 1, 3, 5 or 8 forbidden characters existence in 1k, 10k or 100k words
of 15, 20 or 30 random characters length.
It tests worst case; forbidden chars are never found.
Results at the end of the file
"""
from string import ascii_letters, punctuation, digits
from random import seed, choice
import timeit
import re
from forbidden import forbidden as c_forbidden, forbidden_list as c_forbidden_list
# We check worse case; symbols will never be found
chars = ascii_letters + digits # + punctuation
seed()
forbidden_8 = (":", "/", "?", "#", "~", "=", "&", "!")
forbidden_1 = forbidden_8[0]
forbidden_1_re = re.compile(forbidden_1)
def check_forbidden():
num_times = 1000
symbols_amount = (3, 5, 8)
words_amount = (1000, 10000, 100000)
wrods_sizes = (15, 20, 30)
header = "\t| {:^10} | {:^10} | {:^10} | {:^10} |".format("# SYMBOLS", "TIME NO RE", "TIME RE", "RE SpeedUp")
log_trace = "\t| {:^10} | {:^10.5f} | {:^10.5f} | {:^10.5f} |"
for num_words in words_amount:
for size in wrods_sizes:
print "CHECKING {} WORDS OF {} CHARS LENGTH:".format(num_words, size)
words = ["".join([choice(chars) for i in xrange(size)]) for j in xrange(num_words)]
print header
def find_forbidden_1():
return c_forbidden_list(words, forbidden_1)
#return map(lambda w: c_forbidden(w, forbidden_1), words)
#return map(lambda w: forbidden_1 in w, words)
def find_forbidden_1_re():
return map(lambda w: forbidden_1_re.search(w) is not None, words)
t_std = timeit.timeit(find_forbidden_1, number=num_times)
t_res = timeit.timeit(find_forbidden_1_re, number=num_times)
print log_trace.format(1, t_std, t_res, t_std / t_res)
for num_symbols in symbols_amount:
forbidden = ''.join(forbidden_8[:num_symbols])
forbidden_re = re.compile("[" + "".join(forbidden) + "]")
def find_forbidden_N():
return c_forbidden_list(words, forbidden)
#return map(lambda w: c_forbidden(w, forbidden), words)
# return map(lambda w: any((f in w for f in forbidden)), words)
def find_forbidden_N_re():
return map(lambda w: forbidden_re.search(w) is not None, words)
t_std = timeit.timeit(find_forbidden_N, number=num_times)
t_res = timeit.timeit(find_forbidden_N_re, number=num_times)
print log_trace.format(num_symbols, t_std, t_res, t_std / t_res)
print
if __name__ == '__main__':
check_forbidden()
u"""
Results on MacBook Pro 15" 2 Ghz Intel Core i7 4Gb
CHECKING 1000 WORDS OF 15 CHARS LENGTH:
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp |
| 1 | 0.39066 | 0.44744 | 0.87311 |
| 3 | 0.37141 | 0.58636 | 0.63341 |
| 5 | 0.37210 | 0.57790 | 0.64388 |
| 8 | 0.37863 | 0.58247 | 0.65005 |
CHECKING 1000 WORDS OF 20 CHARS LENGTH:
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp |
| 1 | 0.39521 | 0.45214 | 0.87408 |
| 3 | 0.37573 | 0.62202 | 0.60404 |
| 5 | 0.37896 | 0.62064 | 0.61059 |
| 8 | 0.38474 | 0.62324 | 0.61733 |
CHECKING 1000 WORDS OF 30 CHARS LENGTH:
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp |
| 1 | 0.40622 | 0.46979 | 0.86468 |
| 3 | 0.38748 | 0.70775 | 0.54748 |
| 5 | 0.39146 | 0.70474 | 0.55547 |
| 8 | 0.39701 | 0.70598 | 0.56236 |
CHECKING 10000 WORDS OF 15 CHARS LENGTH:
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp |
| 1 | 3.83916 | 4.50477 | 0.85224 |
| 3 | 3.66388 | 5.82184 | 0.62933 |
| 5 | 3.69978 | 5.79355 | 0.63860 |
| 8 | 3.77003 | 5.80201 | 0.64978 |
CHECKING 10000 WORDS OF 20 CHARS LENGTH:
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp |
| 1 | 3.94811 | 4.56033 | 0.86575 |
| 3 | 3.73949 | 6.53689 | 0.57206 |
| 5 | 3.88055 | 6.35791 | 0.61035 |
| 8 | 4.01171 | 6.26753 | 0.64008 |
CHECKING 10000 WORDS OF 30 CHARS LENGTH:
| # SYMBOLS | TIME NO RE | TIME RE | RE SpeedUp |
| 1 | 4.05587 | 4.68777 | 0.86520 |
| 3 | 3.92409 | 7.26555 | 0.54010 |
| 5 | 3.95718 | 7.24153 | 0.54646 |
| 8 | 4.08453 | 7.07470 | 0.57734 |
"""
#include <Python.h>
#include <string.h>
#include <stddef.h>
static PyObject *
forbidden(PyObject *self, PyObject *args)
{
const char *word;
const char *chars;
if (!PyArg_ParseTuple(args, "ss", &word, &chars))
return NULL;
if (strlen(word) != strcspn(word, chars)) {
Py_RETURN_TRUE;
}
Py_RETURN_FALSE;
}
static PyObject *
forbidden_list(PyObject *self, PyObject *args)
{
const char *chars;
PyObject *words;
PyObject *list;
PyObject *tmp;
long int idx;
long int len;
const char *word;
if (!PyArg_ParseTuple(args, "Os", &words, &chars))
return NULL;
len = PyList_Size(words);
list = PyList_New(0);
for (idx=0; idx<len; idx++) {
tmp = PyList_GetItem(words, idx);
Py_INCREF(tmp);
word = PyString_AsString(tmp);
if (strlen(word) != strcspn(word, chars)) {
PyList_Append(list, Py_True);
Py_INCREF(Py_True);
} else {
PyList_Append(list, Py_False);
Py_INCREF(Py_False);
}
Py_DECREF(tmp);
}
return list;
}
static char forbidden_docs[] =
"forbidden(word, chars): Looks for forbidden chars\n";
static char forbidden_list_docs[] =
"forbidden(words, chars): Looks for forbidden chars in the list of words\n";
static PyMethodDef forbidden_funcs[] = {
{"forbidden", (PyCFunction)forbidden, METH_VARARGS, forbidden_docs},
{"forbidden_list", (PyCFunction)forbidden_list, METH_VARARGS, forbidden_list_docs},
{NULL, NULL, 0, NULL}
};
void initforbidden(void)
{
Py_InitModule3("forbidden", forbidden_funcs, "Finds forbidden chars");
}
from distutils.core import setup, Extension
setup(name='forbidden', version='1.0', ext_modules=[Extension('forbidden', ['forbidden.c'])])
@juandebravo
Copy link

Messi!! 💃

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment