Created
October 18, 2011 19:08
-
-
Save jeffh/1296369 to your computer and use it in GitHub Desktop.
HW2 Test Cases
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request | |
import sys | |
from io import BytesIO, StringIO | |
try: | |
import huij as hw2 | |
except ImportError: | |
print("Could not import homework. Change the import statement on line 7 from 'huij' to your python homework file.") | |
sys.exit(1) | |
def mock_urlopen(**kwargs): | |
return Mockify(urllib.request, 'urlopen', **kwargs) #mock.patch.object(urllib.request, 'urlopen') | |
def test_crawlURL_single_link(): | |
with mock_urlopen(return_value=BytesIO(SAMPLE_PAGE2.encode('utf-8'))): | |
results = hw2.crawlURL('http://cs.strose.edu/goldschd2/') | |
expected_urls = [ | |
('http://google.com/index.php', "A bad page"), | |
] | |
expected_words = { | |
'a': 1, | |
'bad': 1, | |
'page': 1, | |
} | |
assert results[0] == len(SAMPLE_PAGE2) | |
assert results[1] == expected_urls | |
assert results[2] == expected_words | |
def test_crawlURL_with_multiple_links(): | |
with mock_urlopen(return_value=BytesIO(SAMPLE_PAGE.encode('utf-8'))): | |
results = hw2.crawlURL('http://cs.strose.edu/goldschd/') | |
expected_urls = [ | |
('http://google.com/', "Here's a link to google"), | |
('/absolute/relative/', 'a Location'), | |
] | |
expected_words = { | |
"here": 1, | |
's': 1, | |
'a': 2, | |
'link': 1, | |
'to': 1, | |
'google': 1, | |
'location': 1, | |
} | |
expected_words_alternative = { | |
"here's": 1, | |
'a': 2, | |
'link': 1, | |
'to': 1, | |
'google': 1, | |
'location': 1, | |
} | |
assert results[0] == len(SAMPLE_PAGE) | |
assert results[1] == expected_urls | |
assert results[2] == expected_words or results[2] == expected_words_alternative | |
def test_crawlURL_various_anchors(): | |
page = """ | |
<a href="/foo/bar/"><img src="Foobar.jpeg" /></a> | |
<a href="/foo/bar/">Dup<span>licate</span></a> | |
<a href="foo.php"><i>LOL</i></a><a href="page1.html">Click here</a> | |
<a href="page2.html" otherstuff="..." color="green">Also here</a> | |
<a color="blue" otherstuff="..." href="http://cnn.com/another-page.html">Click here</a> | |
<a\n \tfoo="bar" href="page9001.html">It's over 9000!</a> | |
<a href="page9999.html">\nParty like \nIt's\n 9999!</a> | |
<a href="p.php"><b>click here</a> | |
""" | |
with mock_urlopen(return_value=BytesIO(page.encode('utf-8'))): | |
results = hw2.crawlURL('http://google.com') | |
expected_urls = [ | |
('/foo/bar/', ''), | |
('/foo/bar/', 'Duplicate'), | |
('foo.php', 'LOL'), | |
('page1.html', 'Click here'), | |
('page2.html', 'Also here'), | |
('http://cnn.com/another-page.html', 'Click here'), | |
('page9001.html', "It's over 9000!"), | |
('page9999.html', "\nParty like \nIt's\n 9999!"), | |
('p.php', 'click here'), | |
] | |
assert results[0] == len(page) | |
assert results[1] == expected_urls | |
def test_crawlSite_with_link_cycles(): | |
site = [ | |
'''<a href="page1.html">The first page</a> | |
<a href="mailto:lol@fake.com">FOO</a> | |
<a href="git://github.com/jeffh/YACS.git">BAR</a> | |
<a href="page2.html">The second page</a>''', # root | |
'<a href="page1.html">The first page</a>', # page 1 | |
'<a href="page3.html">The third page</a>', # page 2 | |
'<a href="page2.html">The third page</a>', # page 3 | |
] | |
total_bytes = sum(len(x) for x in site) | |
def next_page(*args, **kwargs): | |
return BytesIO(site.pop(0).encode('utf-8')) | |
with mock_urlopen(side_effect=next_page): | |
results = hw2.crawlSite('http://google.com/', politeness=0) | |
expected_urls = [ | |
('http://google.com/page1.html', 'The first page'), | |
('http://google.com/page2.html', 'The second page'), | |
('http://google.com/page3.html', 'The third page'), | |
] | |
expected_words = { | |
'the': 5, | |
'first': 2, | |
'second': 1, | |
'third': 2, | |
'page': 5, | |
'foo': 1, | |
'bar': 1, | |
} | |
assert results[0] == total_bytes | |
assert results[1] == expected_urls | |
assert results[2] == expected_words | |
def test_analyzeStats(): | |
total_bytes = 1073 | |
urls = [ | |
('http://cs.strose.edu/page1.html', 'Click here'), | |
('http://cs.strose.edu/page2.html', 'Also here'), | |
('http://cs.strose.edu/page2.html', 'Good stuff here'), | |
('http://cs.strose.edu/longresourcename.html', 'Check this out'), | |
] | |
wordcounts = { | |
'and': 277, | |
'of': 286, | |
'the': 251, | |
'longestwordfound': 186, | |
'goldschmidt': 247, | |
} | |
expected = """total pages crawled successfully: 3 | |
total words: 1247 | |
URLs and Link-Text: | |
------------------- | |
http://cs.strose.edu/page1.html ==> 'Click here' | |
http://cs.strose.edu/page2.html ==> 'Also here' | |
http://cs.strose.edu/page2.html ==> 'Good stuff here' | |
http://cs.strose.edu/longresourcename.html ==> 'Check this out' | |
Word Counts (total of 1247 words): | |
---------------------------------- | |
of ==> 286 | |
and ==> 277 | |
the ==> 251 | |
goldschmidt ==> 247 | |
longestwordfound ==> 186 | |
""" | |
stdout = StringIO() | |
tmp, sys.stdout = sys.stdout, stdout | |
hw2.analyzeStats(total_bytes, urls, wordcounts) | |
sys.stdout = tmp | |
assert stdout.getvalue().split('\n') == expected.split('\n') | |
SAMPLE_PAGE = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> | |
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
<head> | |
<title> David E. Goldschmidt, Ph.D. </title> | |
<meta http-equiv="content-type" content="text/html; charset=utf-8" /> | |
<meta name="Author" content="David E. Goldschmidt" /> | |
<meta name="Keywords" content="" /> | |
<meta http-equiv="Pragma" content="no-cache" /> | |
<meta http-equiv="Expires" content="-1" /> | |
<link rel="stylesheet" type="text/css" href="cssjs/goldschd.css" /> | |
<link rel="stylesheet" type="text/css" href="cssjs/goldschd-print.css" media="print" /> | |
<script type="text/javascript" src="cssjs/goldschd.js"></script> | |
</head> | |
<body> | |
<div id="all"> | |
<h1> there is no spoon </h1> | |
<a href="http://google.com/">Here's a link to google</a> | |
<p> | |
Blah Blah<a href="/absolute/relative/">a Location</a>LOL | |
</p> | |
</div><!-- end all --> | |
</body> | |
</html>""" | |
SAMPLE_PAGE2 = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> | |
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
<body> | |
<div id="all"> | |
<h1> there is no spoon </h1> | |
<a href="http://google.com/index.php">A bad page</a> | |
</div><!-- end all --> | |
</body> | |
</html>""" | |
#### BEGIN THE MICRO TESTING FRAMEWORK #### | |
import re | |
import sys | |
import traceback | |
from itertools import zip_longest | |
from pprint import pformat | |
class Mockify(object): | |
NONE = object() | |
def __init__(self, module, name, return_value=NONE, side_effect=NONE): | |
self._mod, self._name = module, name | |
self._return_value = None | |
self._return_value_was_set = False | |
self._side_effect = None | |
self._side_effect_was_set = False | |
if return_value != self.NONE: | |
self.return_value = return_value | |
if side_effect != self.NONE: | |
self.side_effect = side_effect | |
@property | |
def return_value(self): | |
return self._return_value | |
@return_value.setter | |
def return_value(self, value): | |
self._return_value = value | |
self._return_value_was_set = True | |
@property | |
def side_effect(self): | |
return self._side_effect | |
@side_effect.setter | |
def side_effect(self, value): | |
self._side_effect = value | |
self._side_effect_was_set = True | |
def __call__(self, *args, **kwargs): | |
if self._side_effect_was_set: | |
return self._side_effect(*args, **kwargs) | |
if self._return_value_was_set: | |
return self.return_value | |
raise TypeError("Mockify requires side_effect or return_value to be set") | |
def __enter__(self): | |
self._old = getattr(self._mod, self._name) | |
setattr(self._mod, self._name, self) | |
return self | |
def __exit__(self, type, value, traceback): | |
setattr(self._mod, self._name, self._old) | |
def most_recent_tb(tb): | |
prev_tb = tb | |
while prev_tb.tb_next: | |
prev_tb = prev_tb.tb_next | |
return prev_tb | |
PARTS = re.compile(r'\W([!=]=|not\W+in|in|[><]=?|or|and)\W') | |
def extract_parts(assert_line): | |
code = assert_line[len('assert'):].strip() | |
parts = PARTS.split(code) | |
if len(parts) > 1: | |
return [p for i, p in enumerate(parts) if i % 2 == 0], [p for i, p in enumerate(parts) if i % 2 == 1] | |
return [code], [] | |
def re_eval(frame): | |
code = traceback.extract_stack(frame)[-1][-1] | |
parts, ops = extract_parts(code) | |
evaled = [] | |
all_globals = {} | |
all_globals.update(frame.f_builtins) | |
all_globals.update(frame.f_globals) | |
for p in parts: | |
evaled.append(eval(p, all_globals, frame.f_locals)) | |
return parts, evaled, ops | |
def format_testname(func): | |
return func.__doc__ or func.__name__[len('test_'):].replace('_', ' ') | |
def is_test(name, value): | |
return name.lower().startswith('test') | |
def tests_from_dict(vars): | |
"Returns all test from a given dictionary." | |
tests = [] | |
for name, value in tuple(vars.items()): | |
if name.startswith('test_'): | |
tests.append(value) | |
return tests | |
def run_tests(tests, fail_fast=False): | |
print("Running {0} Tests:".format(len(tests))) | |
errors = {} | |
true_stdout = sys.stdout | |
for test in tests: | |
stdout = StringIO() | |
sys.stdout = stdout | |
try: | |
test() | |
true_stdout.write('.') | |
true_stdout.flush() | |
except Exception as e: | |
tb = most_recent_tb(sys.exc_info()[2]) | |
snippets, values, ops = re_eval(tb.tb_frame) | |
true_stdout.write('F') | |
true_stdout.flush() | |
errors[test] = (traceback.format_exc(), stdout.getvalue(), zip_longest(ops, values, fillvalue='')) | |
if fail_fast: | |
break | |
sys.stdout = true_stdout | |
if not errors: | |
print("\n\nNo Errors ^_^") | |
sys.exit(0) | |
print("\n") | |
for test_name, (exc, stdout, tree) in errors.items(): | |
print("----- {0} - FAILED -----\n\n{1}\n{2}\n".format( | |
format_testname(test_name), | |
exc, | |
'\n'.join(['%s\n%s' % (pformat(val, indent=4), op.strip()) for op, val in tree]), | |
)) | |
if stdout: | |
print(":::STDOUT:::\n{0}".format(stdout)) | |
print("==== End Errors ====") | |
sys.exit(1) | |
if __name__ == '__main__': | |
run_tests(tests_from_dict(globals()), fail_fast=('-f' in sys.argv or '--failfast' in sys.argv)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment