Last active
November 1, 2019 14:01
-
-
Save robertknight/055d19c61c5dbe2c1169a54e1da7f238 to your computer and use it in GitHub Desktop.
Optimized charmap generation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import time | |
import unicodedata | |
def gen_charmap_new(): | |
current_cat = None | |
current_cat_start = None | |
ranges = [] | |
for i in range(0, sys.maxunicode + 1): | |
cat = unicodedata.category(chr(i)) | |
if cat != current_cat: | |
if current_cat is not None: | |
ranges.append((current_cat, current_cat_start, i - 1)) | |
current_cat = cat | |
current_cat_start = i | |
ranges.append((current_cat, current_cat_start, sys.maxunicode)) | |
tmp_charmap = {} | |
for cat, start, end in ranges: | |
rs = tmp_charmap.setdefault(cat, []) | |
rs.append([start, end]) | |
return tmp_charmap | |
def gen_charmap_old(): | |
tmp_charmap = {} | |
for i in range(0, sys.maxunicode + 1): | |
cat = unicodedata.category(chr(i)) | |
rs = tmp_charmap.setdefault(cat, []) | |
if rs and rs[-1][-1] == i - 1: | |
rs[-1][-1] += 1 | |
else: | |
rs.append([i, i]) | |
return tmp_charmap | |
t1 = time.time() | |
cm1 = gen_charmap_old() | |
t2 = time.time() | |
cm2 = gen_charmap_new() | |
t3 = time.time() | |
print('Charmaps equal', cm1 == cm2) | |
print('Generating charmap (old) took', t2 - t1) | |
print('Generating charmap (new method) took', t3 - t2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[I] ~/o/h/hypothesis-python (vi)> time python3 gencharmap.py | |
Charmaps equal True | |
Generating charmap (old) took 0.869232177734375 | |
Generating charmap (new method) took 0.4008798599243164 | |
1.38 real 1.31 user 0.04 sys |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment