-
-
Save chtnnh/d9a103de881dc8fb77e91a739af61d04 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import textstat | |
import mwapi | |
from revscoring.dependencies import solve | |
from revscoring.datasources.meta import filters | |
from revscoring.features import wikitext | |
from articlequality.feature_lists.enwiki import text_complexity | |
session = mwapi.Session("https://en.wikipedia.org") | |
doc = session.get(action='query', prop='revisions', rvprop='content', | |
titles='Alan Turing', formatversion=2) | |
text = doc['query']['pages'][0]['revisions'][0]['content'] | |
start = time.time() | |
text_flesch = textstat.flesch_reading_ease(text) | |
print("Textstat processing too {0} seconds".format(time.time() - start)) | |
start = time.time() | |
solve(wikitext.revision.datasources.wikicode, cache={'datasource.revision.text': text}) | |
print("Wikitext parsing took {0} seconds".format(time.time() - start)) | |
start = time.time() | |
sections = solve(wikitext.revision.datasources.sections, cache={'datasource.revision.text': text}) | |
print("Parsing {0} sections took {1} seconds".format(len(sections), time.time() - start)) | |
start = time.time() | |
print(list(solve(text_complexity, cache={'datasource.revision.text': text}))) | |
print("Processing took {0} seconds".format(time.time() - start)) | |
for feature in text_complexity: | |
start = time.time() | |
print(feature, solve(feature, cache={'datasource.revision.text': text})) | |
print("Processing took {0} seconds".format(time.time() - start)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ python demo_text_complexity.py | |
Textstat processing too 0.5727684497833252 seconds | |
Wikitext parsing took 0.5000169277191162 seconds | |
Parsing 28 sections took 0.2569730281829834 seconds | |
[-31.94, -124.75, 58.96, 32.71285714285714, -92.81, 90.9, 64.65285714285714] | |
Processing took 2.486294746398926 seconds | |
feature.wikitext.revision.text.flesch -31.94 | |
Processing took 0.0002994537353515625 seconds | |
feature.wikitext.revisions.sections.min_flesch -124.75 | |
Processing took 0.4263880252838135 seconds | |
feature.wikitext.revisions.sections.max_flesch 58.96 | |
Processing took 0.36573004722595215 seconds | |
feature.wikitext.revisions.sections.mean_flesch 32.71285714285714 | |
Processing took 0.4892094135284424 seconds | |
feature.(wikitext.revisions.sections.min_flesch - wikitext.revision.text.flesch) -92.81 | |
Processing took 0.518779993057251 seconds | |
feature.(wikitext.revisions.sections.max_flesch - wikitext.revision.text.flesch) 90.9 | |
Processing took 0.3512563705444336 seconds | |
feature.(wikitext.revisions.sections.mean_flesch - wikitext.revision.text.flesch) 64.65285714285714 | |
Processing took 0.4971330165863037 seconds |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment