Skip to content

Instantly share code, notes, and snippets.

@micimize
Created August 15, 2019 01:15
Show Gist options
  • Save micimize/96c6768c97501333adbc5a0ab17df999 to your computer and use it in GitHub Desktop.
Save micimize/96c6768c97501333adbc5a0ab17df999 to your computer and use it in GitHub Desktop.
Row-wise inspection for goodtables
import typing as t
from math import inf
from goodtables import Inspector, cells
from goodtables.error import Error
from goodtables.inspector import _filter_checks
from goodtables.registry import registry
from tableschema import Schema
from .rules.full_schema_constraint import (
SCHEMA_CONSTRAINTS_CHECKS,
schema_constraints_checks,
)
class RowInspector(Inspector):
"""Extends the goodtable inspector to allow for streaming inspection
"""
schema: Schema
__compiled_checks: t.List[dict]
_body_checks: t.List[dict]
def __init__(
self,
schema: Schema,
checks=None,
skip_checks=None,
error_limit=inf,
row_limit=inf,
*args,
**kwargs
):
super().__init__(
checks=checks or ["structure", SCHEMA_CONSTRAINTS_CHECKS],
skip_checks=skip_checks or [],
error_limit=error_limit,
row_limit=row_limit,
*args,
**kwargs
)
self.schema = schema
self.__compiled_checks = registry.compile_checks(
self._Inspector__checks,
self._Inspector__skip_checks,
order_fields=self._Inspector__order_fields,
infer_fields=self._Inspector__infer_fields,
)
self._body_checks = _filter_checks(self.__compiled_checks, context="body")
def __cheap_cells(self, row_number, row):
return [
{
"header": field.name,
"field": field,
"value": row.get(field.name, None),
"column-number": index + 1,
"row-number": row_number,
}
for index, field in enumerate(self.schema.fields)
]
def inspect_row(self, row, row_number=None) -> t.List[Error]:
"""Inspect a single row in a stream
"""
row_cells = self.__cheap_cells(row_number, row)
errors: t.List[dict] = []
for check in self._body_checks:
# the super Inspector is not actually aware of the schema,
# but all currently needed checks only require field information
check_func = getattr(check["func"], "check_row", check["func"])
errors += check_func(row_cells) or []
return errors
def inspect(self, source, preset=None, **options):
return super(Inspector, self).inspect(
source, preset=None, schema=self.schema, **options
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment