Skip to content

Instantly share code, notes, and snippets.

@jeffh
Created October 18, 2011 19:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeffh/1296369 to your computer and use it in GitHub Desktop.
Save jeffh/1296369 to your computer and use it in GitHub Desktop.
HW2 Test Cases
import urllib.request
import sys
from io import BytesIO, StringIO
try:
import huij as hw2
except ImportError:
print("Could not import homework. Change the import statement on line 7 from 'huij' to your python homework file.")
sys.exit(1)
def mock_urlopen(**kwargs):
return Mockify(urllib.request, 'urlopen', **kwargs) #mock.patch.object(urllib.request, 'urlopen')
def test_crawlURL_single_link():
with mock_urlopen(return_value=BytesIO(SAMPLE_PAGE2.encode('utf-8'))):
results = hw2.crawlURL('http://cs.strose.edu/goldschd2/')
expected_urls = [
('http://google.com/index.php', "A bad page"),
]
expected_words = {
'a': 1,
'bad': 1,
'page': 1,
}
assert results[0] == len(SAMPLE_PAGE2)
assert results[1] == expected_urls
assert results[2] == expected_words
def test_crawlURL_with_multiple_links():
with mock_urlopen(return_value=BytesIO(SAMPLE_PAGE.encode('utf-8'))):
results = hw2.crawlURL('http://cs.strose.edu/goldschd/')
expected_urls = [
('http://google.com/', "Here's a link to google"),
('/absolute/relative/', 'a Location'),
]
expected_words = {
"here": 1,
's': 1,
'a': 2,
'link': 1,
'to': 1,
'google': 1,
'location': 1,
}
expected_words_alternative = {
"here's": 1,
'a': 2,
'link': 1,
'to': 1,
'google': 1,
'location': 1,
}
assert results[0] == len(SAMPLE_PAGE)
assert results[1] == expected_urls
assert results[2] == expected_words or results[2] == expected_words_alternative
def test_crawlURL_various_anchors():
page = """
<a href="/foo/bar/"><img src="Foobar.jpeg" /></a>
<a href="/foo/bar/">Dup<span>licate</span></a>
<a href="foo.php"><i>LOL</i></a><a href="page1.html">Click here</a>
<a href="page2.html" otherstuff="..." color="green">Also here</a>
<a color="blue" otherstuff="..." href="http://cnn.com/another-page.html">Click here</a>
<a\n \tfoo="bar" href="page9001.html">It's over 9000!</a>
<a href="page9999.html">\nParty like \nIt's\n 9999!</a>
<a href="p.php"><b>click here</a>
"""
with mock_urlopen(return_value=BytesIO(page.encode('utf-8'))):
results = hw2.crawlURL('http://google.com')
expected_urls = [
('/foo/bar/', ''),
('/foo/bar/', 'Duplicate'),
('foo.php', 'LOL'),
('page1.html', 'Click here'),
('page2.html', 'Also here'),
('http://cnn.com/another-page.html', 'Click here'),
('page9001.html', "It's over 9000!"),
('page9999.html', "\nParty like \nIt's\n 9999!"),
('p.php', 'click here'),
]
assert results[0] == len(page)
assert results[1] == expected_urls
def test_crawlSite_with_link_cycles():
site = [
'''<a href="page1.html">The first page</a>
<a href="mailto:lol@fake.com">FOO</a>
<a href="git://github.com/jeffh/YACS.git">BAR</a>
<a href="page2.html">The second page</a>''', # root
'<a href="page1.html">The first page</a>', # page 1
'<a href="page3.html">The third page</a>', # page 2
'<a href="page2.html">The third page</a>', # page 3
]
total_bytes = sum(len(x) for x in site)
def next_page(*args, **kwargs):
return BytesIO(site.pop(0).encode('utf-8'))
with mock_urlopen(side_effect=next_page):
results = hw2.crawlSite('http://google.com/', politeness=0)
expected_urls = [
('http://google.com/page1.html', 'The first page'),
('http://google.com/page2.html', 'The second page'),
('http://google.com/page3.html', 'The third page'),
]
expected_words = {
'the': 5,
'first': 2,
'second': 1,
'third': 2,
'page': 5,
'foo': 1,
'bar': 1,
}
assert results[0] == total_bytes
assert results[1] == expected_urls
assert results[2] == expected_words
def test_analyzeStats():
total_bytes = 1073
urls = [
('http://cs.strose.edu/page1.html', 'Click here'),
('http://cs.strose.edu/page2.html', 'Also here'),
('http://cs.strose.edu/page2.html', 'Good stuff here'),
('http://cs.strose.edu/longresourcename.html', 'Check this out'),
]
wordcounts = {
'and': 277,
'of': 286,
'the': 251,
'longestwordfound': 186,
'goldschmidt': 247,
}
expected = """total pages crawled successfully: 3
total words: 1247
URLs and Link-Text:
-------------------
http://cs.strose.edu/page1.html ==> 'Click here'
http://cs.strose.edu/page2.html ==> 'Also here'
http://cs.strose.edu/page2.html ==> 'Good stuff here'
http://cs.strose.edu/longresourcename.html ==> 'Check this out'
Word Counts (total of 1247 words):
----------------------------------
of ==> 286
and ==> 277
the ==> 251
goldschmidt ==> 247
longestwordfound ==> 186
"""
stdout = StringIO()
tmp, sys.stdout = sys.stdout, stdout
hw2.analyzeStats(total_bytes, urls, wordcounts)
sys.stdout = tmp
assert stdout.getvalue().split('\n') == expected.split('\n')
SAMPLE_PAGE = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title> David E. Goldschmidt, Ph.D. </title>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta name="Author" content="David E. Goldschmidt" />
<meta name="Keywords" content="" />
<meta http-equiv="Pragma" content="no-cache" />
<meta http-equiv="Expires" content="-1" />
<link rel="stylesheet" type="text/css" href="cssjs/goldschd.css" />
<link rel="stylesheet" type="text/css" href="cssjs/goldschd-print.css" media="print" />
<script type="text/javascript" src="cssjs/goldschd.js"></script>
</head>
<body>
<div id="all">
<h1> there is no spoon </h1>
<a href="http://google.com/">Here's a link to google</a>
<p>
Blah Blah<a href="/absolute/relative/">a Location</a>LOL
</p>
</div><!-- end all -->
</body>
</html>"""
SAMPLE_PAGE2 = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<body>
<div id="all">
<h1> there is no spoon </h1>
<a href="http://google.com/index.php">A bad page</a>
</div><!-- end all -->
</body>
</html>"""
#### BEGIN THE MICRO TESTING FRAMEWORK ####
import re
import sys
import traceback
from itertools import zip_longest
from pprint import pformat
class Mockify(object):
NONE = object()
def __init__(self, module, name, return_value=NONE, side_effect=NONE):
self._mod, self._name = module, name
self._return_value = None
self._return_value_was_set = False
self._side_effect = None
self._side_effect_was_set = False
if return_value != self.NONE:
self.return_value = return_value
if side_effect != self.NONE:
self.side_effect = side_effect
@property
def return_value(self):
return self._return_value
@return_value.setter
def return_value(self, value):
self._return_value = value
self._return_value_was_set = True
@property
def side_effect(self):
return self._side_effect
@side_effect.setter
def side_effect(self, value):
self._side_effect = value
self._side_effect_was_set = True
def __call__(self, *args, **kwargs):
if self._side_effect_was_set:
return self._side_effect(*args, **kwargs)
if self._return_value_was_set:
return self.return_value
raise TypeError("Mockify requires side_effect or return_value to be set")
def __enter__(self):
self._old = getattr(self._mod, self._name)
setattr(self._mod, self._name, self)
return self
def __exit__(self, type, value, traceback):
setattr(self._mod, self._name, self._old)
def most_recent_tb(tb):
prev_tb = tb
while prev_tb.tb_next:
prev_tb = prev_tb.tb_next
return prev_tb
PARTS = re.compile(r'\W([!=]=|not\W+in|in|[><]=?|or|and)\W')
def extract_parts(assert_line):
code = assert_line[len('assert'):].strip()
parts = PARTS.split(code)
if len(parts) > 1:
return [p for i, p in enumerate(parts) if i % 2 == 0], [p for i, p in enumerate(parts) if i % 2 == 1]
return [code], []
def re_eval(frame):
code = traceback.extract_stack(frame)[-1][-1]
parts, ops = extract_parts(code)
evaled = []
all_globals = {}
all_globals.update(frame.f_builtins)
all_globals.update(frame.f_globals)
for p in parts:
evaled.append(eval(p, all_globals, frame.f_locals))
return parts, evaled, ops
def format_testname(func):
return func.__doc__ or func.__name__[len('test_'):].replace('_', ' ')
def is_test(name, value):
return name.lower().startswith('test')
def tests_from_dict(vars):
"Returns all test from a given dictionary."
tests = []
for name, value in tuple(vars.items()):
if name.startswith('test_'):
tests.append(value)
return tests
def run_tests(tests, fail_fast=False):
print("Running {0} Tests:".format(len(tests)))
errors = {}
true_stdout = sys.stdout
for test in tests:
stdout = StringIO()
sys.stdout = stdout
try:
test()
true_stdout.write('.')
true_stdout.flush()
except Exception as e:
tb = most_recent_tb(sys.exc_info()[2])
snippets, values, ops = re_eval(tb.tb_frame)
true_stdout.write('F')
true_stdout.flush()
errors[test] = (traceback.format_exc(), stdout.getvalue(), zip_longest(ops, values, fillvalue=''))
if fail_fast:
break
sys.stdout = true_stdout
if not errors:
print("\n\nNo Errors ^_^")
sys.exit(0)
print("\n")
for test_name, (exc, stdout, tree) in errors.items():
print("----- {0} - FAILED -----\n\n{1}\n{2}\n".format(
format_testname(test_name),
exc,
'\n'.join(['%s\n%s' % (pformat(val, indent=4), op.strip()) for op, val in tree]),
))
if stdout:
print(":::STDOUT:::\n{0}".format(stdout))
print("==== End Errors ====")
sys.exit(1)
if __name__ == '__main__':
run_tests(tests_from_dict(globals()), fail_fast=('-f' in sys.argv or '--failfast' in sys.argv))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment