Created
August 15, 2019 01:15
-
-
Save micimize/96c6768c97501333adbc5a0ab17df999 to your computer and use it in GitHub Desktop.
Row-wise inspection for goodtables
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import typing as t | |
from math import inf | |
from goodtables import Inspector, cells | |
from goodtables.error import Error | |
from goodtables.inspector import _filter_checks | |
from goodtables.registry import registry | |
from tableschema import Schema | |
from .rules.full_schema_constraint import ( | |
SCHEMA_CONSTRAINTS_CHECKS, | |
schema_constraints_checks, | |
) | |
class RowInspector(Inspector): | |
"""Extends the goodtable inspector to allow for streaming inspection | |
""" | |
schema: Schema | |
__compiled_checks: t.List[dict] | |
_body_checks: t.List[dict] | |
def __init__( | |
self, | |
schema: Schema, | |
checks=None, | |
skip_checks=None, | |
error_limit=inf, | |
row_limit=inf, | |
*args, | |
**kwargs | |
): | |
super().__init__( | |
checks=checks or ["structure", SCHEMA_CONSTRAINTS_CHECKS], | |
skip_checks=skip_checks or [], | |
error_limit=error_limit, | |
row_limit=row_limit, | |
*args, | |
**kwargs | |
) | |
self.schema = schema | |
self.__compiled_checks = registry.compile_checks( | |
self._Inspector__checks, | |
self._Inspector__skip_checks, | |
order_fields=self._Inspector__order_fields, | |
infer_fields=self._Inspector__infer_fields, | |
) | |
self._body_checks = _filter_checks(self.__compiled_checks, context="body") | |
def __cheap_cells(self, row_number, row): | |
return [ | |
{ | |
"header": field.name, | |
"field": field, | |
"value": row.get(field.name, None), | |
"column-number": index + 1, | |
"row-number": row_number, | |
} | |
for index, field in enumerate(self.schema.fields) | |
] | |
def inspect_row(self, row, row_number=None) -> t.List[Error]: | |
"""Inspect a single row in a stream | |
""" | |
row_cells = self.__cheap_cells(row_number, row) | |
errors: t.List[dict] = [] | |
for check in self._body_checks: | |
# the super Inspector is not actually aware of the schema, | |
# but all currently needed checks only require field information | |
check_func = getattr(check["func"], "check_row", check["func"]) | |
errors += check_func(row_cells) or [] | |
return errors | |
def inspect(self, source, preset=None, **options): | |
return super(Inspector, self).inspect( | |
source, preset=None, schema=self.schema, **options | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment