Created
March 10, 2020 20:47
-
-
Save halfak/d3a9635791f98e0105302b7dfd2ca117 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import mwapi | |
from revscoring.dependencies import solve | |
from revscoring.features import wikitext | |
from articlequality.feature_lists.enwiki import text_complexity | |
session = mwapi.Session("https://en.wikipedia.org") | |
doc = session.get(action='query', prop='revisions', rvprop='content', titles='Alan Turing', formatversion=2) | |
text = doc['query']['pages'][0]['revisions'][0]['content'] | |
start = time.time() | |
solve(wikitext.revision.datasources.wikicode, cache={'datasource.revision.text': text}) | |
print("Wikitext parsing took {0} seconds".format(time.time() - start)) | |
start = time.time() | |
sections = solve(wikitext.revision.datasources.sections, cache={'datasource.revision.text': text}) | |
print("Parsing {0} sections took {1} seconds".format(len(sections), time.time() - start)) | |
start = time.time() | |
print(list(solve(text_complexity, cache={'datasource.revision.text': text}))) | |
print("Processing took {0} seconds".format(time.time() - start)) | |
for feature in text_complexity: | |
start = time.time() | |
print(feature, solve(feature, cache={'datasource.revision.text': text})) | |
print("Processing took {0} seconds".format(time.time() - start)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ python demo_text_complexity.py | |
Wikitext parsing took 0.5000169277191162 seconds | |
Parsing 28 sections took 0.2569730281829834 seconds | |
[-31.94, -124.75, 58.96, 32.71285714285714, -92.81, 90.9, 64.65285714285714] | |
Processing took 2.486294746398926 seconds | |
feature.wikitext.revision.text.flesch -31.94 | |
Processing took 0.0002994537353515625 seconds | |
feature.wikitext.revisions.sections.min_flesch -124.75 | |
Processing took 0.4263880252838135 seconds | |
feature.wikitext.revisions.sections.max_flesch 58.96 | |
Processing took 0.36573004722595215 seconds | |
feature.wikitext.revisions.sections.mean_flesch 32.71285714285714 | |
Processing took 0.4892094135284424 seconds | |
feature.(wikitext.revisions.sections.min_flesch - wikitext.revision.text.flesch) -92.81 | |
Processing took 0.518779993057251 seconds | |
feature.(wikitext.revisions.sections.max_flesch - wikitext.revision.text.flesch) 90.9 | |
Processing took 0.3512563705444336 seconds | |
feature.(wikitext.revisions.sections.mean_flesch - wikitext.revision.text.flesch) 64.65285714285714 | |
Processing took 0.4971330165863037 seconds |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment