-
-
Save mlissner/4d2110d7083d74cff3893e261a801515 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import requests | |
from pathlib import Path | |
from urllib.parse import unquote, urljoin, urlparse | |
def load_test_cases(): | |
load_cache = True | |
cache_file = Path(__file__).resolve().parent / "test_cache.json" | |
if cache_file.exists() and load_cache: | |
print("Loading cache file...") | |
with open(cache_file, "rb") as f: | |
test_cases = json.load(f) | |
else: | |
print("No cache found or caching disabled. GETting it from Github...") | |
url = "https://github.com/web-platform-tests/wpt/raw/master/url/resources/urltestdata.json" | |
test_cases = requests.get(url, timeout=2).json() | |
print("Saving cache file for future use...") | |
with open(cache_file, "w") as f: | |
json.dump(test_cases, f, indent=2) | |
# Strip comments from test cases | |
test_cases = [x for x in test_cases if isinstance(x, dict)] | |
return test_cases | |
def normalize_expectations(py_field, expected, got, test): | |
# Some custom tweaks to normalize output assumptions between tests and | |
# python | |
if py_field == "scheme": | |
expected = expected.strip(":") | |
elif py_field == "netloc": | |
if "@" in got: | |
# When user/password is provided, tests expect something like, | |
# 'user:pass@foo'. This is also tested in the password field. | |
expected = f"{test['username']}:{test['password']}@{expected}" | |
if got.endswith(":"): | |
expected = expected + ":" | |
elif py_field == "path": | |
if expected == "/" and got != "/": | |
expected = "" | |
elif py_field == "query": | |
expected = unquote(expected.strip("?")) | |
elif py_field == "fragment": | |
expected = unquote(expected.strip("#")) | |
elif py_field == "port" and expected != "": | |
expected = int(expected) | |
# Normalize None vs '' values | |
if py_field in ["username", "password", "port"] and expected == "": | |
if got != "": | |
# Python distinguishes between blanks and Nones, but the tests only | |
# have blank values. If Python didn't get back a blank, we | |
# have to normalize the expectation, but otherwise, we don't. | |
expected = None | |
return expected | |
def run_tests(test_cases): | |
# Map the Parse | |
print("Running tests...") | |
field_mapping = { | |
"scheme": "protocol", | |
"netloc": "host", | |
"path": "pathname", | |
"query": "search", | |
"fragment": "hash", | |
"username": "username", | |
"password": "password", | |
"port": "port", | |
} | |
failure_count, success_count, skip_count = 0, 0, 0 | |
for i, test in enumerate(test_cases): | |
# if i >= 64: | |
# break | |
try: | |
print(f"\nTesting: {i} with input: '{test['input']}'") | |
except UnicodeEncodeError: | |
# Some of these throw a weird error about 'surrogates not allowed'. | |
# I can't figure this out, so warn and punt. | |
skip_count += 1 | |
print(f"\nTesting: {i}.\n Warn: Got UnicodeEncodeError. Skipping.") | |
continue | |
test_failed = False | |
if test["base"] != "about:blank": | |
# include test of test urljoin | |
try: | |
url = urljoin(test["base"], test["input"]) | |
except ValueError: | |
if test.get("failure") is not True: | |
failure_count += 1 | |
print(" Fail: urljoin crashed, but shouldn't have.") | |
continue | |
else: | |
url = test["input"] | |
try: | |
parse_result = urlparse(url) | |
except ValueError: | |
if test.get("failure") is not True: | |
failure_count += 1 | |
print(" Fail: urlparse crashed, but shouldn't have.") | |
continue | |
print(f" Parsed result: {parse_result}") | |
if test.get("failure") is True: | |
failure_count += 1 | |
print(" Fail: Did not get expected parse failure") | |
continue | |
# | |
# Check each of the attributes in the | |
# tests match those of the parse result | |
# | |
for py_field, test_field in field_mapping.items(): | |
try: | |
got = getattr(parse_result, py_field) | |
except ValueError: | |
test_failed = True | |
print( | |
f" Fail: Parsing '{py_field}' field crashed, but shouldn't have." | |
) | |
break | |
expected_raw = test[test_field] | |
expected = normalize_expectations(py_field, expected_raw, got, test) | |
if got != expected: | |
test_failed = True | |
print( | |
f" Parsed '{py_field}' does not match '{test_field}' of test:\n" | |
f" Got: '{got}'\n" | |
f" Expected: '{expected}'" | |
) | |
if test_failed: | |
failure_count += 1 | |
else: | |
success_count += 1 | |
print(" ok") | |
total_count = success_count + failure_count + skip_count | |
print( | |
f"\nDone. {success_count}/{total_count} successes. {skip_count} skipped." | |
) | |
if __name__ == "__main__": | |
test_cases = load_test_cases() | |
run_tests(test_cases) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment