Created
August 24, 2018 10:04
-
-
Save lahwaacz/3d59ebbf0cc35c6bbf686f93e0341727 to your computer and use it in GitHub Desktop.
Benchmark for https://github.com/earwig/mwparserfromhell/issues/195
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
import re | |
from itertools import chain | |
import mwparserfromhell | |
FLAGS = re.IGNORECASE | re.DOTALL | re.UNICODE | |
def indexed_ifilter(wikicode, recursive=True, matches=None, flags=FLAGS, | |
forcetype=None): | |
"""Iterate over nodes and their corresponding indices and parents. | |
The arguments are interpreted as for :meth:`ifilter`. For each tuple | |
``(parent, i, node)`` yielded by this method, ``parent`` is the direct | |
parent wikicode of ``node`` and ``parent.index(node) == i``. | |
""" | |
match = wikicode._build_matcher(matches, flags) | |
if recursive: | |
restrict = forcetype if recursive == wikicode.RECURSE_OTHERS else None | |
def getter(node): | |
for parent, ch in wikicode._get_children(node, restrict=restrict, contexts=True, parent=wikicode): | |
i = parent.index(ch) | |
yield (parent, i, ch) | |
inodes = chain(*(getter(n) for n in wikicode.nodes)) | |
else: | |
inodes = ((wikicode, i, node) for i, node in enumerate(wikicode.nodes)) | |
for parent, i, node in inodes: | |
if (not forcetype or isinstance(node, forcetype)) and match(node): | |
yield (parent, i, node) | |
def parented_ifilter(wikicode, recursive=True, matches=None, flags=FLAGS, | |
forcetype=None): | |
"""Iterate over nodes and their corresponding parents. | |
The arguments are interpreted as for :meth:`ifilter`. For each tuple | |
``(parent, node)`` yielded by this method, ``parent`` is the direct | |
parent wikicode of ``node``. | |
""" | |
match = wikicode._build_matcher(matches, flags) | |
if recursive: | |
restrict = forcetype if recursive == wikicode.RECURSE_OTHERS else None | |
def getter(node): | |
for parent, ch in wikicode._get_children(node, restrict=restrict, contexts=True, parent=wikicode): | |
yield (parent, ch) | |
inodes = chain(*(getter(n) for n in wikicode.nodes)) | |
else: | |
inodes = ((wikicode, node) for node in wikicode.nodes) | |
for parent, node in inodes: | |
if (not forcetype or isinstance(node, forcetype)) and match(node): | |
yield (parent, node) | |
@profile | |
def expand_1(wikicode): | |
for template in wikicode.ifilter_templates(recursive=wikicode.RECURSE_OTHERS): | |
if template.has(1): | |
replacement = template.get(1).value | |
expand_1(replacement) | |
else: | |
replacement = "" | |
wikicode.replace(template, replacement) | |
@profile | |
def expand_2(wikicode): | |
for parent, i, template in indexed_ifilter(wikicode, forcetype=mwparserfromhell.nodes.template.Template, recursive=wikicode.RECURSE_OTHERS): | |
if template.has(1): | |
replacement = template.get(1).value | |
expand_2(replacement) | |
else: | |
replacement = "" | |
assert parent.get(i) is template, (parent.nodes[i], i, template) | |
parent.nodes.pop(i) | |
parent.insert(i, replacement) | |
@profile | |
def expand_3(wikicode): | |
for parent, template in parented_ifilter(wikicode, forcetype=mwparserfromhell.nodes.template.Template, recursive=wikicode.RECURSE_OTHERS): | |
if template.has(1): | |
replacement = template.get(1).value | |
expand_3(replacement) | |
else: | |
replacement = "" | |
parent.replace(template, replacement, recursive=False) | |
wikicode = mwparserfromhell.parse(open("wireless.mediawiki").read()) | |
expand_1(wikicode) | |
result_1 = str(wikicode) | |
wikicode = mwparserfromhell.parse(open("wireless.mediawiki").read()) | |
expand_2(wikicode) | |
result_2 = str(wikicode) | |
wikicode = mwparserfromhell.parse(open("wireless.mediawiki").read()) | |
expand_3(wikicode) | |
result_3 = str(wikicode) | |
assert result_1 == result_2 | |
assert result_1 == result_3 | |
wikicode = mwparserfromhell.parse(open("wireless.mediawiki").read()) | |
#print(wikicode) | |
indexed_templates = [] | |
for parent, i, node in indexed_ifilter(wikicode, forcetype=mwparserfromhell.nodes.template.Template): | |
indexed_templates.append(node) | |
# if parent is wikicode: | |
# print("<wikicode>", i, node) | |
# else: | |
# print(parent, i, node) | |
assert parent.get(i) is node | |
assert indexed_templates == wikicode.filter_templates() | |
parented_templates = [] | |
for parent, node in parented_ifilter(wikicode, forcetype=mwparserfromhell.nodes.template.Template): | |
parented_templates.append(node) | |
# if parent is wikicode: | |
# print("<wikicode>", i, node) | |
# else: | |
# print(parent, i, node) | |
assert parent.index(node) >= 0 | |
assert parented_templates == wikicode.filter_templates() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment