mlissner/url_spec_test.py Secret

## url_spec_test.py
import json
import requests
from pathlib import Path
from urllib.parse import unquote, urljoin, urlparse


def load_test_cases():
    load_cache = True
    cache_file = Path(__file__).resolve().parent / "test_cache.json"
    if cache_file.exists() and load_cache:
        print("Loading cache file...")
        with open(cache_file, "rb") as f:
            test_cases = json.load(f)
    else:
        print("No cache found or caching disabled. GETting it from Github...")
        url = "https://github.com/web-platform-tests/wpt/raw/master/url/resources/urltestdata.json"
        test_cases = requests.get(url, timeout=2).json()
        print("Saving cache file for future use...")
        with open(cache_file, "w") as f:
            json.dump(test_cases, f, indent=2)

    # Strip comments from test cases
    test_cases = [x for x in test_cases if isinstance(x, dict)]
    return test_cases


def normalize_expectations(py_field, expected, got, test):
    # Some custom tweaks to normalize output assumptions between tests and
    # python
    if py_field == "scheme":
        expected = expected.strip(":")
    elif py_field == "netloc":
        if "@" in got:
            # When user/password is provided, tests expect something like,
            # 'user:pass@foo'. This is also tested in the password field.
            expected = f"{test['username']}:{test['password']}@{expected}"
        if got.endswith(":"):
            expected = expected + ":"
    elif py_field == "path":
        if expected == "/" and got != "/":
            expected = ""
    elif py_field == "query":
        expected = unquote(expected.strip("?"))
    elif py_field == "fragment":
        expected = unquote(expected.strip("#"))
    elif py_field == "port" and expected != "":
        expected = int(expected)

    # Normalize None vs '' values
    if py_field in ["username", "password", "port"] and expected == "":
        if got != "":
            # Python distinguishes between blanks and Nones, but the tests only
            # have blank values. If Python didn't get back a blank, we
            # have to normalize the expectation, but otherwise, we don't.
            expected = None

    return expected


def run_tests(test_cases):
    # Map the Parse
    print("Running tests...")
    field_mapping = {
        "scheme": "protocol",
        "netloc": "host",
        "path": "pathname",
        "query": "search",
        "fragment": "hash",
        "username": "username",
        "password": "password",
        "port": "port",
    }
    failure_count, success_count, skip_count = 0, 0, 0
    for i, test in enumerate(test_cases):
        # if i >= 64:
        #     break
        try:
            print(f"\nTesting: {i} with input: '{test['input']}'")
        except UnicodeEncodeError:
            # Some of these throw a weird error about 'surrogates not allowed'.
            # I can't figure this out, so warn and punt.
            skip_count += 1
            print(f"\nTesting: {i}.\n  Warn: Got UnicodeEncodeError. Skipping.")
            continue
        test_failed = False
        if test["base"] != "about:blank":
            # include test of test urljoin
            try:
                url = urljoin(test["base"], test["input"])
            except ValueError:
                if test.get("failure") is not True:
                    failure_count += 1
                    print("  Fail: urljoin crashed, but shouldn't have.")
                    continue
        else:
            url = test["input"]

        try:
            parse_result = urlparse(url)
        except ValueError:
            if test.get("failure") is not True:
                failure_count += 1
                print("  Fail: urlparse crashed, but shouldn't have.")
                continue
        print(f"  Parsed result: {parse_result}")
        if test.get("failure") is True:
            failure_count += 1
            print("  Fail: Did not get expected parse failure")
            continue

        #
        # Check each of the attributes in the
        # tests match those of the parse result
        #
        for py_field, test_field in field_mapping.items():
            try:
                got = getattr(parse_result, py_field)
            except ValueError:
                test_failed = True
                print(
                    f"  Fail: Parsing '{py_field}' field crashed, but shouldn't have."
                )
                break
            expected_raw = test[test_field]
            expected = normalize_expectations(py_field, expected_raw, got, test)

            if got != expected:
                test_failed = True
                print(
                    f"  Parsed '{py_field}' does not match '{test_field}' of test:\n"
                    f"         Got: '{got}'\n"
                    f"    Expected: '{expected}'"
                )

        if test_failed:
            failure_count += 1
        else:
            success_count += 1
            print("  ok")

    total_count = success_count + failure_count + skip_count
    print(
        f"\nDone. {success_count}/{total_count} successes. {skip_count} skipped."
    )


if __name__ == "__main__":
    test_cases = load_test_cases()
    run_tests(test_cases)
	import json
	import requests
	from pathlib import Path
	from urllib.parse import unquote, urljoin, urlparse


	def load_test_cases():
	load_cache = True
	cache_file = Path(__file__).resolve().parent / "test_cache.json"
	if cache_file.exists() and load_cache:
	print("Loading cache file...")
	with open(cache_file, "rb") as f:
	test_cases = json.load(f)
	else:
	print("No cache found or caching disabled. GETting it from Github...")
	url = "https://github.com/web-platform-tests/wpt/raw/master/url/resources/urltestdata.json"
	test_cases = requests.get(url, timeout=2).json()
	print("Saving cache file for future use...")
	with open(cache_file, "w") as f:
	json.dump(test_cases, f, indent=2)

	# Strip comments from test cases
	test_cases = [x for x in test_cases if isinstance(x, dict)]
	return test_cases


	def normalize_expectations(py_field, expected, got, test):
	# Some custom tweaks to normalize output assumptions between tests and
	# python
	if py_field == "scheme":
	expected = expected.strip(":")
	elif py_field == "netloc":
	if "@" in got:
	# When user/password is provided, tests expect something like,
	# 'user:pass@foo'. This is also tested in the password field.
	expected = f"{test['username']}:{test['password']}@{expected}"
	if got.endswith(":"):
	expected = expected + ":"
	elif py_field == "path":
	if expected == "/" and got != "/":
	expected = ""
	elif py_field == "query":
	expected = unquote(expected.strip("?"))
	elif py_field == "fragment":
	expected = unquote(expected.strip("#"))
	elif py_field == "port" and expected != "":
	expected = int(expected)

	# Normalize None vs '' values
	if py_field in ["username", "password", "port"] and expected == "":
	if got != "":
	# Python distinguishes between blanks and Nones, but the tests only
	# have blank values. If Python didn't get back a blank, we
	# have to normalize the expectation, but otherwise, we don't.
	expected = None

	return expected


	def run_tests(test_cases):
	# Map the Parse
	print("Running tests...")
	field_mapping = {
	"scheme": "protocol",
	"netloc": "host",
	"path": "pathname",
	"query": "search",
	"fragment": "hash",
	"username": "username",
	"password": "password",
	"port": "port",
	}
	failure_count, success_count, skip_count = 0, 0, 0
	for i, test in enumerate(test_cases):
	# if i >= 64:
	# break
	try:
	print(f"\nTesting: {i} with input: '{test['input']}'")
	except UnicodeEncodeError:
	# Some of these throw a weird error about 'surrogates not allowed'.
	# I can't figure this out, so warn and punt.
	skip_count += 1
	print(f"\nTesting: {i}.\n Warn: Got UnicodeEncodeError. Skipping.")
	continue
	test_failed = False
	if test["base"] != "about:blank":
	# include test of test urljoin
	try:
	url = urljoin(test["base"], test["input"])
	except ValueError:
	if test.get("failure") is not True:
	failure_count += 1
	print(" Fail: urljoin crashed, but shouldn't have.")
	continue
	else:
	url = test["input"]

	try:
	parse_result = urlparse(url)
	except ValueError:
	if test.get("failure") is not True:
	failure_count += 1
	print(" Fail: urlparse crashed, but shouldn't have.")
	continue
	print(f" Parsed result: {parse_result}")
	if test.get("failure") is True:
	failure_count += 1
	print(" Fail: Did not get expected parse failure")
	continue

	#
	# Check each of the attributes in the
	# tests match those of the parse result
	#
	for py_field, test_field in field_mapping.items():
	try:
	got = getattr(parse_result, py_field)
	except ValueError:
	test_failed = True
	print(
	f" Fail: Parsing '{py_field}' field crashed, but shouldn't have."
	)
	break
	expected_raw = test[test_field]
	expected = normalize_expectations(py_field, expected_raw, got, test)

	if got != expected:
	test_failed = True
	print(
	f" Parsed '{py_field}' does not match '{test_field}' of test:\n"
	f" Got: '{got}'\n"
	f" Expected: '{expected}'"
	)

	if test_failed:
	failure_count += 1
	else:
	success_count += 1
	print(" ok")

	total_count = success_count + failure_count + skip_count
	print(
	f"\nDone. {success_count}/{total_count} successes. {skip_count} skipped."
	)


	if __name__ == "__main__":
	test_cases = load_test_cases()
	run_tests(test_cases)