Created
September 25, 2021 01:38
-
-
Save swhume/0641934bedc991011a1a589c860cd600 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import os | |
import json | |
import logging | |
import argparse | |
class LinkCrawler: | |
""" SHARE v2.0 link crawler for testing HATEOAS content in API responses """ | |
def __init__(self, base_url, start_resource, log_file="link_log.txt", log_path=None, username=None, password=None): | |
self.user = username | |
self.pwd = password | |
self.base_url = base_url | |
self.resource = start_resource | |
if log_path is None: | |
self.log_path = os.path.dirname(os.path.realpath(__file__)) | |
else: | |
self.log_path = log_path | |
self.log_file_name = os.path.join(self.log_path, log_file) | |
self.logger = self._setup_logging() | |
self.tested_urls = set() | |
self.urls = set() | |
self.urls.add(self.resource) | |
def test_links(self): | |
while self.urls: | |
resource = self.urls.pop() | |
r = requests.get(self.base_url + resource, auth=(self.user, self.pwd)) | |
if r.status_code == 200: | |
print(f"Good! - {resource}") | |
# hack to test for an invalid resource that returns a valid empty set instead of a 404 | |
warn_msg = self._test_for_empty_response(r, resource) | |
if warn_msg: | |
self.logger.warning(warn_msg) | |
else: | |
self.logger.error(f"{str(r.status_code)} for {resource}") | |
self.tested_urls.add(resource) | |
self._get_links(r.text) | |
def _test_for_empty_response(self, r, resource): | |
warn_msg = None | |
api_resp = json.loads(r.text) | |
# assumption: when response _links only includes "self" then it is an empty set | |
if "_links" in api_resp: | |
links = api_resp["_links"] | |
if len(links) == 1: | |
warn_msg = "Warning! Response is empty: " + resource | |
return warn_msg | |
def _get_links(self, content): | |
content_dict = json.loads(content) | |
for url in self._link_finder(content_dict, "href"): | |
self.urls.add(url) | |
self.urls.difference_update(self.tested_urls) | |
def _link_finder(self, json_input, lookup_key): | |
if isinstance(json_input, dict): | |
for k, v in json_input.items(): | |
if k == lookup_key: | |
yield v | |
else: | |
yield from self._link_finder(v, lookup_key) | |
elif isinstance(json_input, list): | |
for item in json_input: | |
yield from self._link_finder(item, lookup_key) | |
def _setup_logging(self): | |
logging.getLogger(__name__).addHandler(logging.NullHandler()) | |
logger = logging.getLogger() | |
file_log_handler = logging.FileHandler(self.log_file_name) | |
logger.addHandler(file_log_handler) | |
stderr_log_handler = logging.StreamHandler() | |
logger.addHandler(stderr_log_handler) | |
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') | |
file_log_handler.setFormatter(formatter) | |
stderr_log_handler.setFormatter(formatter) | |
return logger | |
def set_cmd_line_args(log_path_default, start_resource): | |
""" command-line arguments - set defaults to something convenient to simplify launching """ | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-b", "--base_url", dest="base_url", help="SHARE API base URL", default="https://mdsp-qa.nurocorcloud.com/api") | |
parser.add_argument("-r", "--resource", dest="start_resource", help="SHARE API resource", default=start_resource) | |
parser.add_argument("-l", "--log_file", help="log file name", default="link_log.txt", dest="log_file") | |
parser.add_argument("-d", "--log_dir", help="path to log file directory", default=log_path_default, dest="log_path") | |
parser.add_argument("-u", "--user", help="SHARE API username", default="test", dest="username") | |
parser.add_argument("-p", "--pwd", help="SHARE API password", default="secret", dest="password") | |
args = parser.parse_args() | |
return args | |
def main(): | |
# change data_path and start_resource to suit your tests - or set via command-line | |
data_path = os.path.dirname(os.path.realpath(__file__)) + '\\data' | |
start_resource = "/mdr/adam/adamig-1-1/datastructures" | |
args = set_cmd_line_args(data_path, start_resource) | |
ln = LinkCrawler(args.base_url, args.start_resource, log_path=args.log_path, username=args.username, password=args.password) | |
ln.test_links() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment