Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
voldmar@work ~/temp % python rus.py
TEXT_RU
9.3005130291
6.33021712303
0.310887098312
0.146034002304
0.112301826477
TEXT_EN
7.99509000778
4.40287303925
1.02460694313
22.2835221291
8.09571194649
# coding: utf-8
from timeit import timeit
import re
import itertools as it
import operator as op
RUSSIAN_ALPHA = \
ur"[ЙЦУКЕНГШЩЗХЪЭЖДЛОРПАВЫФЯЧСМИТЬБЮЁйцукенгшщзхъэждлорпавыфячсмитьбюё]"
RUSSIAN_ALPHA_SET = frozenset(
u'ЙЦУКЕНГШЩЗХЪЭЖДЛОРПАВЫФЯЧСМИТЬБЮЁйцукенгшщзхъэждлорпавыфячсмитьбюё'
)
is_russian = lambda s: bool(filter(RUSSIAN_ALPHA_SET.__contains__, s))
is_russian2 = lambda s: bool(RUSSIAN_ALPHA_SET & set(s))
is_russian3 = lambda s: bool(re.search(RUSSIAN_ALPHA, s))
def is_russian4(s):
i = it.dropwhile(lambda c: c not in RUSSIAN_ALPHA_SET, s)
for j in i:
return True
return False
def is_russian5(s):
i = it.ifilter(RUSSIAN_ALPHA_SET.__contains__, s)
for j in i:
return True
return False
TEXT_RU = u'''Особенность рекламы масштабирует охват аудитории, не считаясь с
затратами. Позиционирование на рынке концентрирует поведенческий таргетинг,
отвоевывая рыночный сегмент. Несмотря на сложности, департамент маркетинга и
продаж допускает выставочный стенд, не считаясь с затратами.
Маркетингово-ориентированное издание, пренебрегая деталями, конструктивно.
Соц-дем характеристика аудитории, пренебрегая деталями, концентрирует
креативный пресс-клиппинг, осознавая социальную ответственность бизнеса.'''
TEXT_EN = u'''Lorem Ipsum is simply dummy text of the printing and typesetting
industry. Lorem Ipsum has been the industry's standard dummy text ever since
the 1500s, when an unknown printer took a galley of type and scrambled it to
make a type specimen book. It has survived not only five centuries, but also
the leap into electronic typesetting, remaining essentially unchanged. It was
popularised in the 1960s with the release of Letraset sheets containing Lorem
Ipsum passages, and more recently with desktop publishing software like Aldus
PageMaker including versions of Lorem Ipsum.'''
LEN = max(map(len, [TEXT_RU, TEXT_EN]))
TEXT_RU, TEXT_EN = TEXT_RU[:LEN], TEXT_EN[:LEN]
print 'TEXT_RU'
print timeit('is_russian(TEXT_RU)', 'from __main__ import is_russian, TEXT_RU', number=100000)
print timeit('is_russian2(TEXT_RU)', 'from __main__ import is_russian2, TEXT_RU', number=100000)
print timeit('is_russian3(TEXT_RU)', 'from __main__ import is_russian3, TEXT_RU', number=100000)
print timeit('is_russian4(TEXT_RU)', 'from __main__ import is_russian4, TEXT_RU', number=100000)
print timeit('is_russian5(TEXT_RU)', 'from __main__ import is_russian5, TEXT_RU', number=100000)
print 'TEXT_EN'
print timeit('is_russian(TEXT_EN)', 'from __main__ import is_russian, TEXT_EN', number=100000)
print timeit('is_russian2(TEXT_EN)', 'from __main__ import is_russian2, TEXT_EN', number=100000)
print timeit('is_russian3(TEXT_EN)', 'from __main__ import is_russian3, TEXT_EN', number=100000)
print timeit('is_russian4(TEXT_EN)', 'from __main__ import is_russian4, TEXT_EN', number=100000)
print timeit('is_russian5(TEXT_EN)', 'from __main__ import is_russian5, TEXT_EN', number=100000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment