Skip to content

Instantly share code, notes, and snippets.

@rennerocha
Created June 4, 2020 20:00
Show Gist options
  • Save rennerocha/4debfcc957dde04259b200df8ee19a19 to your computer and use it in GitHub Desktop.
Save rennerocha/4debfcc957dde04259b200df8ee19a19 to your computer and use it in GitHub Desktop.
import re
from collections import namedtuple
from spidermon import Monitor, MonitorSuite, monitors
def check_coverage(expected, current, parent_field=None):
CoverageResult = namedtuple("CoverageResult", ("status", "failures"))
failures = []
for field, value in expected.items():
field_name = '"{}"'.format(field) if "/" in field else field
if parent_field:
field_name = "/".join([parent_field, field_name])
if isinstance(value, dict):
nested_result = check_coverage(value, current.get(field, {}), field_name)
failures.extend(nested_result.failures)
continue
current_value = current.get(field, 0)
if current_value < value:
failures.append(
{"name": field_name, "expected": value, "current": current_value,}
)
status = "SUCCESS" if not failures else "FAIL"
return CoverageResult(status=status, failures=failures,)
def clean_field_coverage_stats(field_coverage):
for field, value in field_coverage.items():
if isinstance(value, dict):
clean_field_coverage_stats(value)
continue
numeric_value = float(re.search(r"[\d.]+", value).group())
field_coverage[field] = numeric_value
return field_coverage
SPIDERMON_FIELD_COVERAGE = {"title": 100, "product_information": {"availability": 90,}}
# 'fields_coverage': {'product_information': {'availability': '51.1%',
# 'number of reviews': '49.0%',
# 'price (excl. tax)': '51.0%',
# 'price (incl. tax)': '50.0%',
# 'product type': '49.7%',
# 'tax': '49.9%',
# 'upc': '52.3%'},
# 'title': '49.9%',
# 'url': '100.0%'},
@monitors.name("Field coverage")
class FieldCoverageMonitor(Monitor):
@monitors.name("Items has correct field coverage")
def test_items_field_coverage(self):
expected_field_coverage = self.data["spider"].settings.getdict(
"SPIDERMON_FIELD_COVERAGE"
)
field_coverage = clean_field_coverage_stats(
self.data["stats"]["fields_coverage"]
)
coverage_results = check_coverage(expected_field_coverage, field_coverage)
self.assertTrue(coverage_results.status == "SUCCESS")
self.assertTrue(False)
class SpiderCloseMonitorSuite(MonitorSuite):
monitors = [
FieldCoverageMonitor,
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment