Skip to content

Instantly share code, notes, and snippets.

@ambv
Last active July 17, 2021 11:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ambv/01c2c4ef79515f158dea00ad93498696 to your computer and use it in GitHub Desktop.
Save ambv/01c2c4ef79515f158dea00ad93498696 to your computer and use it in GitHub Desktop.
Use Hypothesis for smoketests of https://github.com/python/cpython/pull/27091
"""
Runs PBTs on `bytes.count` and `bytes.find`. Also confirms results match Python 3.9.
How to use:
- build CPython 3.11 from source
- run ./python.exe -m ensurepip
- run ./python.exe -m pip install hypothesis
- run ./python.exe run_hypo.py # this file
Note: `MAX` value below generates tests that on my machine execute between
2 - 5 minutes. Decrease it if it's too slow for you.
"""
import atexit
from collections import Counter
import os
import subprocess
from tempfile import NamedTemporaryFile
from textwrap import dedent
import unittest
from hypothesis import given, settings, HealthCheck
from hypothesis.strategies import binary
MAX = 256
stats: Counter[int] = Counter()
too_slow = HealthCheck.too_slow
def confirm_count_on_python39(
needle: bytes, haystack: bytes, count: int, found: int
) -> None:
content = dedent(
f"""
needle = {needle!r}
haystack = {haystack!r}
count = {count!r}
found = {found!r}
assert haystack.count(needle) == count
assert haystack.find(needle) == found
"""
).lstrip()
with NamedTemporaryFile("w", suffix=".py", delete=False) as f:
f.write(content)
try:
subprocess.run(["python3.9", f.name], check=True)
finally:
os.unlink(f.name)
class TestCount(unittest.TestCase):
@given(binary(max_size=MAX))
def test_count_of_self_is_one(self, b):
stats['t1'] += 1
self.assertEqual(b.count(b), 1)
@settings(deadline=None, suppress_health_check=[too_slow])
@given(binary(max_size=MAX), binary(min_size=MAX + 1, max_size=16 * MAX))
def test_count_doesnt_crash(self, needle, haystack):
stats['t2'] += 1
count = haystack.count(needle)
self.assertGreaterEqual(count, 0)
if count:
stats['t2.count'] += 1
found = haystack.find(needle)
self.assertNotEqual(found, -1)
confirm_count_on_python39(needle, haystack, count, found)
self.assertEqual(needle.count(haystack), 0)
self.assertEqual(needle.find(haystack), -1)
mid = len(haystack) // 2
for i in range(1, 100, 3):
needle = haystack[mid:mid+i]
found = haystack.find(needle)
self.assertLessEqual(found, mid)
if found == mid:
stats['t2.found'] += 1
count = haystack.count(needle)
confirm_count_on_python39(needle, haystack, count, found)
def print_stats():
print(stats)
if __name__ == "__main__":
atexit.register(print_stats)
unittest.main()
@ambv
Copy link
Author

ambv commented Jul 16, 2021

@Zac-HD, any simple ideas how to make those more interesting?

@Zac-HD
Copy link

Zac-HD commented Jul 17, 2021

  • test_count_doesnt_crash will have a very large minimum example size, with len(haystack) >= 157. Probably not worth doing anything about this, but FYI.
  • Maybe restricting your bytestrings to a small alphabet could be interesting, by increaing collisions? e.g. with from_regex(b"[abc]{257,}", fullmatch=True)
  • Driving this with a coverage-guided fuzzer would probably be interesting; I'd suggest Atheris since there's a lot of C code involved. If you want to try HypoFuzz for Python code though just let me know, OSS devs are welcome to a free copy.

Otherwise this looks good to me!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment