diff --git a/data/invalid/datapackage_no_foreign_key.json b/data/invalid/datapackage_no_foreign_key.json new file mode 100644 index 0000000000..67be839f6d --- /dev/null +++ b/data/invalid/datapackage_no_foreign_key.json @@ -0,0 +1,49 @@ +{ + "name": "testing", + "resources": [ + { + "name": "data", + "path": "data.csv", + "schema": { + "fields": [ + { + "name": "id", + "type": "string", + "constraints": { + "required": true + } + }, + { + "name": "name", + "type": "string" + }, + { + "name": "description", + "type": "string" + }, + { + "name": "amount", + "type": "number" + } + ], + "primaryKey": "id" + } + }, + { + "name": "data2", + "path": "data2.csv", + "schema": { + "fields": [ + { + "type": "string", + "name": "parent" + }, + { + "type": "string", + "name": "comment" + } + ] + } + } + ] +} diff --git a/frictionless/__init__.py b/frictionless/__init__.py index 163a0df895..a086d97455 100644 --- a/frictionless/__init__.py +++ b/frictionless/__init__.py @@ -40,4 +40,6 @@ from .table import Lookup as Lookup from .table import Row as Row from .transformer import Transformer as Transformer + +# Deprecated from .validator import Validator as Validator diff --git a/frictionless/detector/detector.py b/frictionless/detector/detector.py index fc3e47e0a1..cd771923b4 100644 --- a/frictionless/detector/detector.py +++ b/frictionless/detector/detector.py @@ -9,7 +9,6 @@ from .. import helpers, settings from ..dialect import Dialect -from ..exception import FrictionlessException from ..fields import AnyField from ..metadata import Metadata from ..platform import platform @@ -403,33 +402,6 @@ def detect_schema( fields[index] = AnyField(name=name, schema=schema) # type: ignore schema.fields = fields # type: ignore - # Sync schema - if self.schema_sync: - if labels: - case_sensitive = options["header_case"] - - if not case_sensitive: - labels = [label.lower() for label in labels] - - if len(labels) != len(set(labels)): - note = '"schema_sync" requires unique labels in the header' - raise FrictionlessException(note) - - mapped_fields = self.mapped_schema_fields_names( - schema.fields, # type: ignore - case_sensitive, - ) - - self.rearrange_schema_fields_given_labels( - mapped_fields, - schema, - labels, - ) - - self.add_missing_required_labels_to_schema_fields( - mapped_fields, schema, labels, case_sensitive - ) - # Patch schema if self.schema_patch: patch = deepcopy(self.schema_patch) @@ -443,57 +415,3 @@ def detect_schema( schema = Schema.from_descriptor(descriptor) return schema - - @staticmethod - def mapped_schema_fields_names( - fields: List[Field], case_sensitive: bool - ) -> Dict[str, Field]: - """Create a dictionnary to map field names with schema fields""" - if case_sensitive: - return {field.name: field for field in fields} - else: - return {field.name.lower(): field for field in fields} - - @staticmethod - def rearrange_schema_fields_given_labels( - fields_mapping: Dict[str, Field], - schema: Schema, - labels: List[str], - ): - """Rearrange fields according to the order of labels. All fields - missing from labels are dropped""" - schema.clear_fields() - - for name in labels: - default_field = Field.from_descriptor({"name": name, "type": "any"}) - field = fields_mapping.get(name, default_field) - schema.add_field(field) - - def add_missing_required_labels_to_schema_fields( - self, - fields_mapping: Dict[str, Field], - schema: Schema, - labels: List[str], - case_sensitive: bool, - ): - """This method aims to add missing required labels and - primary key field not in labels to schema fields. - """ - for name, field in fields_mapping.items(): - if ( - self.field_is_required(field, schema, case_sensitive) - and name not in labels - ): - schema.add_field(field) - - @staticmethod - def field_is_required( - field: Field, - schema: Schema, - case_sensitive: bool, - ) -> bool: - if case_sensitive: - return field.required or field.name in schema.primary_key - else: - lower_primary_key = [pk.lower() for pk in schema.primary_key] - return field.required or field.name.lower() in lower_primary_key diff --git a/frictionless/validator/__spec__/package/test_general.py b/frictionless/package/__spec__/test_validate.py similarity index 52% rename from frictionless/validator/__spec__/package/test_general.py rename to frictionless/package/__spec__/test_validate.py index 485abbdac0..fdb1121508 100644 --- a/frictionless/validator/__spec__/package/test_general.py +++ b/frictionless/package/__spec__/test_validate.py @@ -1,5 +1,6 @@ import json import pathlib +from copy import deepcopy import pytest @@ -11,6 +12,7 @@ Resource, Schema, fields, + platform, ) # General @@ -302,7 +304,10 @@ def test_validate_package_using_detector_schema_sync_issue_847(): Resource( data=[["f1"], ["v1"], ["v2"], ["v3"]], schema=Schema( - fields=[fields.StringField(name="f1"), fields.StringField(name="f2")], + fields=[ + fields.StringField(name="f1"), + fields.StringField(name="f2"), + ], ), ), ] @@ -362,3 +367,313 @@ def test_package_licenses_required_path_or_name_issue_1290(): descriptor = {"resources": [], "licenses": [{"title": "title"}]} report = Package.validate_descriptor(descriptor) assert report.errors[0].note.count('license requires "path" or "name"') + + +def test_package_validate_with_skip_errors(): + ## Test runs on data with two blank-row errors, one primary-key error, see + # first test case + test_cases = [ + {"ignore": [], "expect_errors": ["blank-row", "primary-key", "blank-row"]}, + {"ignore": ["primary-key"], "expect_errors": ["blank-row", "blank-row"]}, + {"ignore": ["blank-row"], "expect_errors": ["primary-key"]}, + {"ignore": ["blank-row", "primary-key"], "expect_errors": []}, + ] + + for tc in test_cases: + with open("data/invalid/datapackage.json") as file: + package = Package(json.load(file), basepath="data/invalid") + checklist = Checklist(skip_errors=tc["ignore"]) + + report = package.validate(checklist) + + assert report.flatten(["type"]) == [[t] for t in tc["expect_errors"]] + + +# Stats + +DESCRIPTOR_SH = { + "resources": [ + { + "name": "resource1", + "path": "data/table.csv", + "hash": "sha256:a1fd6c5ff3494f697874deeb07f69f8667e903dd94a7bc062dd57550cea26da8", + "bytes": 30, + } + ] +} + + +@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows") +def test_package_validate_stats(): + source = deepcopy(DESCRIPTOR_SH) + package = Package(source) + report = package.validate() + assert report.valid + + +def test_package_validate_stats_invalid(): + source = deepcopy(DESCRIPTOR_SH) + source["resources"][0]["hash"] += "a" + source["resources"][0]["bytes"] += 1 + package = Package(source) + report = package.validate() + assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ + [None, None, "hash-count"], + [None, None, "byte-count"], + ] + + +@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows") +def test_package_validate_stats_size(): + source = deepcopy(DESCRIPTOR_SH) + source["resources"][0].pop("hash") + package = Package(source) + report = package.validate() + assert report.valid + + +def test_package_validate_stats_size_invalid(): + source = deepcopy(DESCRIPTOR_SH) + source["resources"][0]["bytes"] += 1 + source["resources"][0].pop("hash") + package = Package(source) + report = package.validate() + assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ + [None, None, "byte-count"], + ] + + +@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows") +def test_package_validate_stats_hash(): + source = deepcopy(DESCRIPTOR_SH) + source["resources"][0].pop("bytes") + package = Package(source) + report = package.validate() + assert report.valid + + +def test_package_validate_check_file_package_stats_hash_invalid(): + source = deepcopy(DESCRIPTOR_SH) + source["resources"][0].pop("bytes") + source["resources"][0]["hash"] += "a" + package = Package(source) + report = package.validate() + assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ + [None, None, "hash-count"], + ] + + +# Schema + +DESCRIPTOR_FK = { + "resources": [ + { + "name": "cities", + "data": [ + ["id", "name", "next_id"], + [1, "london", 2], + [2, "paris", 3], + [3, "rome", 4], + [4, "rio", None], + ], + "schema": { + "fields": [ + {"name": "id", "type": "integer"}, + {"name": "name", "type": "string"}, + {"name": "next_id", "type": "integer"}, + ], + "foreignKeys": [ + { + "fields": "next_id", + "reference": {"resource": "", "fields": "id"}, + }, + { + "fields": "id", + "reference": {"resource": "people", "fields": "label"}, + }, + ], + }, + }, + { + "name": "people", + "data": [["label", "population"], [1, 8], [2, 2], [3, 3], [4, 6]], + }, + ], +} + +MULTI_FK_RESSOURCE = { + "name": "travel_time", + "data": [["from", "to", "hours"], [1, 2, 1.5], [2, 3, 8], [3, 4, 18]], + "schema": { + "fields": [ + {"name": "from", "type": "integer"}, + {"name": "to", "type": "integer"}, + {"name": "hours", "type": "number"}, + ], + "foreignKeys": [ + { + "fields": ["from", "to"], + "reference": {"resource": "cities", "fields": ["id", "next_id"]}, + } + ], + }, +} + + +def test_package_validate_schema_foreign_key_error(): + descriptor = deepcopy(DESCRIPTOR_FK) + package = Package(descriptor) + report = package.validate() + assert report.valid + + +def test_package_validate_schema_foreign_key_not_defined(): + descriptor = deepcopy(DESCRIPTOR_FK) + del descriptor["resources"][0]["schema"]["foreignKeys"] + package = Package(descriptor) + report = package.validate() + assert report.valid + + +def test_package_validate_schema_foreign_key_self_referenced_resource_violation(): + descriptor = deepcopy(DESCRIPTOR_FK) + del descriptor["resources"][0]["data"][4] + package = Package(descriptor) + report = package.validate() + assert report.flatten(["rowNumber", "fieldNumber", "type", "cells"]) == [ + [4, None, "foreign-key", ["3", "rome", "4"]], + ] + + +def test_package_validate_schema_foreign_key_internal_resource_violation(): + descriptor = deepcopy(DESCRIPTOR_FK) + del descriptor["resources"][1]["data"][4] + package = Package(descriptor) + report = package.validate() + assert report.flatten(["rowNumber", "fieldNumber", "type", "cells"]) == [ + [5, None, "foreign-key", ["4", "rio", ""]], + ] + + +def test_package_validate_schema_foreign_key_internal_resource_violation_non_existent(): + descriptor = deepcopy(DESCRIPTOR_FK) + descriptor["resources"][1]["data"] = [["label", "population"], [10, 10]] + package = Package(descriptor) + report = package.validate() + assert report.flatten(["rowNumber", "fieldNumber", "type", "cells"]) == [ + [2, None, "foreign-key", ["1", "london", "2"]], + [3, None, "foreign-key", ["2", "paris", "3"]], + [4, None, "foreign-key", ["3", "rome", "4"]], + [5, None, "foreign-key", ["4", "rio", ""]], + ] + + +def test_package_validate_schema_multiple_foreign_key(): + descriptor = deepcopy(DESCRIPTOR_FK) + descriptor["resources"].append(MULTI_FK_RESSOURCE) + package = Package(descriptor) + report = package.validate() + assert report.valid + + +def test_package_validate_schema_multiple_foreign_key_resource_violation_non_existent(): + descriptor = deepcopy(DESCRIPTOR_FK) + # remove London + del descriptor["resources"][0]["data"][1] + descriptor["resources"].append(MULTI_FK_RESSOURCE) + package = Package(descriptor) + report = package.validate() + assert report.flatten(["rowNumber", "fieldNumber", "type", "cells", "note"]) == [ + [ + 2, + None, + "foreign-key", + ["1", "2", "1.5"], + 'for "from, to": values "1, 2" not found in the lookup table "cities" as "id, next_id"', + ], + ] + + +def test_package_validate_schema_multiple_foreign_key_violations(): + descriptor = deepcopy(DESCRIPTOR_FK) + # Add some wrong fks + descriptor["resources"][0]["data"][3][0] = 5 + descriptor["resources"][0]["data"][4][0] = 6 + descriptor["resources"].append(MULTI_FK_RESSOURCE) + package = Package(descriptor) + report = package.validate() + assert report.flatten( + [ + "rowNumber", + "fieldNames", + "fieldCells", + "referenceName", + "referenceFieldNames", + ] + ) == [ + [3, ["next_id"], ["3"], "", ["id"]], + [4, ["next_id"], ["4"], "", ["id"]], + [4, ["id"], ["5"], "people", ["label"]], + [5, ["id"], ["6"], "people", ["label"]], + [4, ["from", "to"], ["3", "4"], "cities", ["id", "next_id"]], + ] + + +# Bugs + + +def test_package_validate_using_detector_schema_sync_issue_847(): + package = Package( + resources=[ + Resource( + data=[["f1"], ["v1"], ["v2"], ["v3"]], + schema=Schema( + fields=[ + fields.AnyField(name="f1"), + fields.AnyField(name="f2"), + ] + ), + ), + ] + ) + for resource in package.resources: + resource.detector = Detector(schema_sync=True) + report = package.validate() + assert report.valid + + +# Parallel + +# Note: to test parallel validation, do not use foreign keys to prevent an +# automatic fallback on single-core execution + + +@pytest.mark.ci +def test_package_validate_parallel_from_dict(): + with open("data/datapackage.json") as file: + package = Package(json.load(file), basepath="data") + report = package.validate(parallel=True) + assert report.valid + + +@pytest.mark.ci +def test_package_validate_parallel_from_dict_invalid(): + with open("data/invalid/datapackage_no_foreign_key.json") as file: + package = Package(json.load(file), basepath="data/invalid") + report = package.validate(parallel=True) + assert report.flatten(["taskNumber", "rowNumber", "fieldNumber", "type"]) == [ + [1, 3, None, "blank-row"], + [1, 3, None, "primary-key"], + [2, 4, None, "blank-row"], + ] + + +@pytest.mark.ci +def test_package_validate_with_parallel(): + package = Package("data/invalid/datapackage_no_foreign_key.json") + report = package.validate(parallel=True) + assert report.flatten(["taskNumber", "rowNumber", "fieldNumber", "type"]) == [ + [1, 3, None, "blank-row"], + [1, 3, None, "primary-key"], + [2, 4, None, "blank-row"], + ] diff --git a/frictionless/package/package.py b/frictionless/package/package.py index bf5e180c7f..4fdd2c555d 100644 --- a/frictionless/package/package.py +++ b/frictionless/package/package.py @@ -1,24 +1,25 @@ from __future__ import annotations +from multiprocessing import Pool from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Union import attrs from typing_extensions import Self from .. import errors, fields, helpers, settings +from ..checklist import Checklist from ..exception import FrictionlessException from ..metadata import Metadata from ..platform import platform +from ..report import Report from ..resource import Resource from ..system import system from ..transformer import Transformer -from ..validator import Validator from .factory import Factory if TYPE_CHECKING: from .. import types from ..catalog import Dataset - from ..checklist import Checklist from ..detector import Detector from ..dialect import Control, Dialect from ..indexer import IOnProgress, IOnRow @@ -476,20 +477,61 @@ def validate( Parameters: checklist? (checklist): a Checklist object - parallel? (bool): run in parallel if possible + parallel? (bool): run in parallel if possible. Parallel execution + is not possible if foreign keys are used in a resource schema. Returns: Report: validation report """ - validator = Validator() - return validator.validate_package( - self, - checklist=checklist, - name=name, - parallel=parallel, - limit_rows=limit_rows, - limit_errors=limit_errors, + # Create state + timer = helpers.Timer() + reports: List[Report] = [] + resources = self.resources if name is None else [self.get_resource(name)] + with_foreign_keys = any( + res.schema and res.schema.foreign_keys for res in resources + ) + + # Prepare checklist + checklist = checklist or Checklist() + + # Validate metadata + try: + self.to_descriptor(validate=True) + except FrictionlessException as exception: + return Report.from_validation(time=timer.time, errors=exception.to_errors()) + + # Validate sequential + if not parallel or with_foreign_keys: + for resource in resources: + report = resource.validate( + checklist=checklist, + limit_errors=limit_errors, + limit_rows=limit_rows, + ) + reports.append(report) + + # Validate parallel + else: + with Pool() as pool: + options_pool: List[Dict[str, Any]] = [] + for resource in resources: + options: Any = {} + options["resource"] = {} + options["resource"]["descriptor"] = resource.to_descriptor() + options["resource"]["basepath"] = resource.basepath + options["validate"] = {} + options["validate"]["limit_rows"] = limit_rows + options["validate"]["limit_errors"] = limit_errors + options_pool.append(options) + report_descriptors = pool.map(_validate_parallel, options_pool) + for report_descriptor in report_descriptors: + reports.append(Report.from_descriptor(report_descriptor)) + + # Return report + return Report.from_validation_reports( + time=timer.time, + reports=reports, ) # Convert @@ -706,3 +748,11 @@ def metadata_export(self): # type: ignore # descriptor = {"$frictionless": "package/v2", **descriptor} return descriptor + + +def _validate_parallel(options: types.IDescriptor) -> types.IDescriptor: + resource_options = options["resource"] + validate_options = options["validate"] + resource = Resource.from_descriptor(**resource_options) + report = resource.validate(**validate_options) + return report.to_descriptor() diff --git a/frictionless/validator/__spec__/resource/test_general.py b/frictionless/resource/__spec__/test_validate.py similarity index 62% rename from frictionless/validator/__spec__/resource/test_general.py rename to frictionless/resource/__spec__/test_validate.py index 31002ad4b1..0f2af067ee 100644 --- a/frictionless/validator/__spec__/resource/test_general.py +++ b/frictionless/resource/__spec__/test_validate.py @@ -6,9 +6,12 @@ Check, Checklist, Detector, + Dialect, FrictionlessException, Resource, + Schema, errors, + platform, ) from frictionless.resources import TableResource @@ -450,3 +453,302 @@ def test_resource_validate_resource_metadata_errors_with_fields_993(): assert error.note == "descriptor is not valid" assert reasons[0].type == "resource-error" assert reasons[0].note == '"fields" should be set as "schema.fields"' + + +# Checklist + + +def test_resource_validate_bound_checklist(): + checklist = Checklist(pick_errors=["blank-label", "blank-row"]) + resource = TableResource(path="data/invalid.csv") + report = resource.validate(checklist) + assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ + [None, 3, "blank-label"], + [4, None, "blank-row"], + ] + + +# Compression + + +def test_resource_validate_compression(): + resource = TableResource(path="data/table.csv.zip") + report = resource.validate() + assert report.valid + + +def test_resource_validate_compression_explicit(): + resource = TableResource(path="data/table.csv.zip", compression="zip") + report = resource.validate() + assert report.valid + + +def test_resource_validate_compression_invalid(): + resource = TableResource(path="data/table.csv.zip", compression="bad") + report = resource.validate() + assert report.flatten(["type", "note"]) == [ + ["compression-error", 'compression "bad" is not supported'], + ] + + +# Detector + + +def test_resource_validate_detector_sync_schema(): + schema = Schema.from_descriptor( + { + "fields": [ + {"name": "id", "type": "integer"}, + {"name": "name", "type": "string"}, + ], + } + ) + detector = Detector(schema_sync=True) + resource = TableResource( + path="data/sync-schema.csv", schema=schema, detector=detector + ) + report = resource.validate() + assert report.valid + + +def test_resource_validate_detector_sync_schema_invalid(): + source = [["LastName", "FirstName", "Address"], ["Test", "Tester", "23 Avenue"]] + schema = Schema.from_descriptor( + { + "fields": [ + {"name": "id", "type": "string"}, + {"name": "FirstName", "type": "string"}, + {"name": "LastName", "type": "string"}, + ] + } + ) + detector = Detector(schema_sync=True) + resource = TableResource(data=source, schema=schema, detector=detector) + report = resource.validate() + assert report.valid + + +def test_resource_validate_detector_headers_errors(): + source = [ + ["id", "last_name", "first_name", "language"], + [1, "Alex", "John", "English"], + [2, "Peters", "John", "Afrikaans"], + [3, "Smith", "Paul", None], + ] + schema = Schema.from_descriptor( + { + "fields": [ + {"name": "id", "type": "number"}, + { + "name": "language", + "type": "string", + "constraints": {"required": True}, + }, + {"name": "country", "type": "string"}, + ] + } + ) + detector = Detector(schema_sync=True) + resource = TableResource(data=source, schema=schema, detector=detector) + report = resource.validate() + assert report.flatten(["rowNumber", "fieldNumber", "type", "cells"]) == [ + [4, 4, "constraint-error", ["3", "Smith", "Paul", ""]], + ] + + +def test_resource_validate_detector_patch_schema(): + detector = Detector(schema_patch={"missingValues": ["-"]}) + resource = TableResource(path="data/table.csv", detector=detector) + report = resource.validate() + assert report.valid + assert resource.schema.to_descriptor() == { + "fields": [ + {"name": "id", "type": "integer"}, + {"name": "name", "type": "string"}, + ], + "missingValues": ["-"], + } + + +def test_resource_validate_detector_patch_schema_fields(): + detector = Detector( + schema_patch={"fields": {"id": {"type": "string"}}, "missingValues": ["-"]} + ) + resource = TableResource(path="data/table.csv", detector=detector) + report = resource.validate() + assert report.valid + assert resource.schema.to_descriptor() == { + "fields": [ + {"name": "id", "type": "string"}, + {"name": "name", "type": "string"}, + ], + "missingValues": ["-"], + } + + +def test_resource_validate_detector_infer_type_string(): + detector = Detector(field_type="string") + resource = TableResource(path="data/table.csv", detector=detector) + report = resource.validate() + assert report.valid + assert resource.schema.to_descriptor() == { + "fields": [ + {"name": "id", "type": "string"}, + {"name": "name", "type": "string"}, + ], + } + + +def test_resource_validate_detector_infer_type_any(): + detector = Detector(field_type="any") + resource = TableResource(path="data/table.csv", detector=detector) + report = resource.validate() + assert report.valid + assert resource.schema.to_descriptor() == { + "fields": [{"name": "id", "type": "any"}, {"name": "name", "type": "any"}], + } + + +def test_resource_validate_detector_infer_names(): + dialect = Dialect(header=False) + detector = Detector(field_names=["id", "name"]) + resource = TableResource( + path="data/without-headers.csv", dialect=dialect, detector=detector + ) + report = resource.validate() + assert report.valid + assert resource.schema.fields[0].name == "id" + assert resource.schema.fields[1].name == "name" + assert resource.stats.rows == 3 + assert resource.labels == [] + assert resource.header == ["id", "name"] + + +# Encoding + + +def test_resource_validate_encoding(): + resource = TableResource(path="data/table.csv", encoding="utf-8") + report = resource.validate() + assert report.valid + + +@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows") +def test_resource_validate_encoding_invalid(): + resource = TableResource(path="data/latin1.csv", encoding="utf-8") + report = resource.validate() + assert not report.valid + assert report.flatten(["type", "note"]) == [ + [ + "encoding-error", + "'utf-8' codec can't decode byte 0xa9 in position 20: invalid start byte", + ], + ] + + +# File + + +def test_resource_validate_format_non_tabular(): + resource = Resource("data/table.bad") + report = resource.validate() + assert report.valid + + +def test_resource_validate_invalid_resource_standards_v2_strict(): + report = Resource.validate_descriptor({"path": "data/table.csv"}) + assert report.flatten(["type", "note"]) == [ + ["resource-error", "'name' is a required property"], + ] + + +# Format + + +def test_resource_validate_format(): + resource = TableResource(path="data/table.csv", format="csv") + report = resource.validate() + assert report.valid + + +# Stats + + +def test_resource_validate_stats_hash(): + hash = "sha256:a1fd6c5ff3494f697874deeb07f69f8667e903dd94a7bc062dd57550cea26da8" + resource = TableResource(path="data/table.csv", hash=hash) + report = resource.validate() + assert report.task.valid + + +def test_resource_validate_stats_hash_invalid(): + hash = "6c2c61dd9b0e9c6876139a449ed87933" + resource = TableResource(path="data/table.csv", hash="bad") + report = resource.validate() + assert report.flatten(["type", "note"]) == [ + [ + "hash-count", + 'expected is "bad" and actual is "%s"' % hash, + ], + ] + + +def test_resource_validate_stats_bytes(): + resource = TableResource(path="data/table.csv", bytes=30) + report = resource.validate() + assert report.task.valid + + +def test_resource_validate_stats_bytes_invalid(): + resource = TableResource(path="data/table.csv", bytes=40) + report = resource.validate() + assert report.task.error.to_descriptor().get("rowNumber") is None + assert report.task.error.to_descriptor().get("fieldNumber") is None + assert report.flatten(["type", "note"]) == [ + ["byte-count", 'expected is "40" and actual is "30"'], + ] + + +def test_resource_validate_stats_rows(): + resource = TableResource(path="data/table.csv", rows=2) + report = resource.validate() + assert report.task.valid + + +def test_resource_validate_stats_rows_invalid(): + resource = TableResource(path="data/table.csv", rows=3) + report = resource.validate() + assert report.task.error.to_descriptor().get("rowNumber") is None + assert report.task.error.to_descriptor().get("fieldNumber") is None + assert report.flatten(["type", "note"]) == [ + ["row-count", 'expected is "3" and actual is "2"'], + ] + + +def test_resource_validate_stats_not_supported_hash_algorithm(): + resource = TableResource.from_descriptor( + { + "name": "name", + "path": "data/table.csv", + "hash": "sha1:db6ea2f8ff72a9e13e1d70c28ed1c6b42af3bb0e", + } + ) + report = resource.validate() + assert report.task.warnings == ["hash is ignored; supported algorithms: md5/sha256"] + + +# Scheme + + +def test_resource_validate_scheme(): + resource = TableResource(path="data/table.csv", scheme="file") + report = resource.validate() + assert report.valid + + +def test_resource_validate_scheme_invalid(): + resource = TableResource(path="bad://data/table.csv") + report = resource.validate() + assert report.flatten(["type", "note"]) == [ + ["scheme-error", 'scheme "bad" is not supported'], + ] diff --git a/frictionless/validator/__spec__/resource/test_dialect.py b/frictionless/resource/__spec__/test_validate_dialect.py similarity index 99% rename from frictionless/validator/__spec__/resource/test_dialect.py rename to frictionless/resource/__spec__/test_validate_dialect.py index b587ce8841..984999b0c9 100644 --- a/frictionless/validator/__spec__/resource/test_dialect.py +++ b/frictionless/resource/__spec__/test_validate_dialect.py @@ -1,7 +1,7 @@ from frictionless import Dialect, Resource, formats from frictionless.resources import TableResource -# General +# Dialect def test_resource_validate_dialect_delimiter(): diff --git a/frictionless/validator/__spec__/resource/test_schema.py b/frictionless/resource/__spec__/test_validate_schema.py similarity index 100% rename from frictionless/validator/__spec__/resource/test_schema.py rename to frictionless/resource/__spec__/test_validate_schema.py diff --git a/frictionless/resource/resource.py b/frictionless/resource/resource.py index 3d93d85c95..dfff73ffab 100644 --- a/frictionless/resource/resource.py +++ b/frictionless/resource/resource.py @@ -8,22 +8,22 @@ from typing_extensions import Self from .. import errors, fields, helpers, settings +from ..checklist import Checklist from ..detector import Detector from ..dialect import Control, Dialect from ..exception import FrictionlessException from ..metadata import Metadata from ..platform import platform +from ..report import Report from ..schema import Schema from ..system import system -from ..validator import Validator from .factory import Factory from .stats import ResourceStats if TYPE_CHECKING: from .. import types - from ..checklist import Checklist + from ..error import Error from ..package import Package - from ..report import Report from ..system import Loader @@ -619,8 +619,112 @@ def validate( Report: validation report """ - validator = Validator() - return validator.validate_resource(self, checklist=checklist) + # Create state + partial = False + timer = helpers.Timer() + labels: List[str] = [] + errors: List[Error] = [] + warnings: List[str] = [] + + # Prepare checklist + checklist = checklist or Checklist() + checks = checklist.connect(self) + + # Validate metadata + try: + self.to_descriptor(validate=True) + except FrictionlessException as exception: + return Report.from_validation_task( + self, time=timer.time, errors=exception.to_errors() + ) + + # TODO: remove in next version + # Ignore not-supported hashings + if self.hash: + algorithm, _ = helpers.parse_resource_hash_v1(self.hash) + if algorithm not in ["md5", "sha256"]: + warning = "hash is ignored; supported algorithms: md5/sha256" + warnings.append(warning) + + # Prepare resource + if self.closed: + try: + self.open() + except FrictionlessException as exception: + self.close() + return Report.from_validation_task( + self, time=timer.time, errors=exception.to_errors() + ) + + # Validate data + with self: + # Validate start + for index, check in enumerate(checks): + for error in check.validate_start(): + if error.type == "check-error": + del checks[index] + if checklist.match(error): + errors.append(error) + + # Validate file + if not isinstance(self, platform.frictionless_resources.TableResource): + if self.hash is not None or self.bytes is not None: + helpers.pass_through(self.byte_stream) + + # Validate table + else: + row_count = 0 + labels = self.labels + while True: + row_count += 1 + + # Emit row + try: + row = next(self.row_stream) # type: ignore + except FrictionlessException as exception: + errors.append(exception.error) + continue + except StopIteration: + break + + # Validate row + for check in checks: + for error in check.validate_row(row): + if checklist.match(error): + errors.append(error) + + # Callback row + if on_row: + on_row(row) + + # Limit rows + if limit_rows: + if row_count >= limit_rows: + warning = f"reached row limit: {limit_rows}" + warnings.append(warning) + partial = True + break + + # Limit errors + if limit_errors: + if len(errors) >= limit_errors: + errors = errors[:limit_errors] + warning = f"reached error limit: {limit_errors}" + warnings.append(warning) + partial = True + break + + # Validate end + if not partial: + for check in checks: + for error in check.validate_end(): + if checklist.match(error): + errors.append(error) + + # Return report + return Report.from_validation_task( + self, time=timer.time, labels=labels, errors=errors, warnings=warnings + ) # Export diff --git a/frictionless/resources/table.py b/frictionless/resources/table.py index fdf9ffce4b..0bc1a2f254 100644 --- a/frictionless/resources/table.py +++ b/frictionless/resources/table.py @@ -5,9 +5,7 @@ import warnings from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union -from frictionless.schema.field import Field - -from .. import errors, helpers, settings +from .. import errors, helpers from ..analyzer import Analyzer from ..dialect import Dialect from ..exception import FrictionlessException @@ -17,11 +15,9 @@ from ..system import system from ..table import Header, Lookup, Row, Table from ..transformer import Transformer -from ..validator import Validator if TYPE_CHECKING: from .. import types - from ..checklist import Checklist from ..indexer import IOnProgress, IOnRow from ..pipeline import Pipeline from ..system import Loader, Parser @@ -219,6 +215,7 @@ def __open_header(self): fields=self.schema.fields, row_numbers=self.dialect.header_rows, ignore_case=not self.dialect.header_case, + schema_sync=self.detector.schema_sync, ) # Handle errors @@ -265,25 +262,6 @@ def __open_lookup(self): self.__lookup[source_name][source_key].add(cells) def __open_row_stream(self): - # TODO: we need to rework this field_info / row code - # During row streaming we create a field info structure - # This structure is optimized and detached version of schema.fields - # We create all data structures in-advance to share them between rows - - # Create field info - field_number = 0 - field_info: Dict[str, Any] = {"names": [], "objects": [], "mapping": {}} - for field in self.schema.fields: - field_number += 1 - field_info["names"].append(field.name) - field_info["objects"].append(field.to_copy()) - field_info["mapping"][field.name] = ( - field, - field_number, - field.create_cell_reader(), - field.create_cell_writer(), - ) - # Create state memory_unique: Dict[str, Any] = {} memory_primary: Dict[Tuple[Any], Any] = {} @@ -313,9 +291,10 @@ def row_stream(): for row_number, cells in enumerated_content_stream: self.stats.rows += 1 + assert self.__header row = Row( cells, - field_info=field_info, + expected_fields=self.__header.get_expected_fields(), row_number=row_number, ) @@ -395,24 +374,8 @@ def row_stream(): # Yield row yield row - if self.detector.schema_sync: - # Missing required labels are not included in the - # field_info parameter used for row creation - for field in self.schema.fields: - self.remove_missing_required_label_from_field_info(field, field_info) - - # Create row stream self.__row_stream = row_stream() - def remove_missing_required_label_from_field_info( - self, field: Field, field_info: Dict[str, Any] - ): - is_case_sensitive = self.dialect.header_case - if self.label_is_missing( - field.name, field_info["names"], self.labels, is_case_sensitive - ): - self.remove_field_from_field_info(field.name, field_info) - @staticmethod def label_is_missing( field_name: str, @@ -432,13 +395,6 @@ def label_is_missing( return field_name not in table_labels and field_name in expected_field_names - @staticmethod - def remove_field_from_field_info(field_name: str, field_info: Dict[str, Any]): - field_index = field_info["names"].index(field_name) - del field_info["names"][field_index] - del field_info["objects"][field_index] - del field_info["mapping"][field_name] - def primary_key_cells(self, row: Row, case_sensitive: bool) -> Tuple[Any, ...]: """Create a tuple containg all cells from a given row associated to primary keys""" @@ -626,27 +582,6 @@ def transform(self, pipeline: Pipeline): transformer = Transformer() return transformer.transform_table_resource(self, pipeline) - # Validate - - def validate( - self, - checklist: Optional[Checklist] = None, - *, - name: Optional[str] = None, - on_row: Optional[types.ICallbackFunction] = None, - parallel: bool = False, - limit_rows: Optional[int] = None, - limit_errors: int = settings.DEFAULT_LIMIT_ERRORS, - ): - validator = Validator() - return validator.validate_resource( - self, - checklist=checklist, - on_row=on_row, - limit_rows=limit_rows, - limit_errors=limit_errors, - ) - # Export def to_view(self, type: str = "look", **options: Any): diff --git a/frictionless/table/header.py b/frictionless/table/header.py index ed485a1ad5..43c64d3d26 100644 --- a/frictionless/table/header.py +++ b/frictionless/table/header.py @@ -1,12 +1,13 @@ from __future__ import annotations from functools import cached_property -from typing import TYPE_CHECKING, List +from typing import List, Optional from .. import errors, helpers +from ..exception import FrictionlessException +from ..schema import Field -if TYPE_CHECKING: - from ..schema import Field +Label = str class Header(List[str]): # type: ignore @@ -29,9 +30,12 @@ def __init__( fields: List[Field], row_numbers: List[int], ignore_case: bool = False, + schema_sync: bool, ): super().__init__(field.name for field in fields) - self.__fields = [field.to_copy() for field in fields] + self.__fields = fields.copy() + self.__expected_fields: Optional[List[Field]] = None + self.__schema_sync = schema_sync self.__field_names = self.copy() self.__row_numbers = row_numbers self.__ignore_case = ignore_case @@ -125,47 +129,41 @@ def __process(self): if self.missing: return - # Prepare context - labels = self.__labels - fields = self.__fields + labels = self.labels # Extra label - if len(fields) < len(labels): - start = len(fields) + 1 - iterator = labels[len(fields) :] - for field_number, label in enumerate(iterator, start=start): + start = len(self.fields) + 1 + for field_number, label in enumerate(self._get_extra_labels(), start=start): + self.__errors.append( + errors.ExtraLabelError( + note="", + labels=list(map(str, labels)), + row_numbers=self.__row_numbers, + label=label, + field_name="", + field_number=field_number, + ) + ) + + # Missing label + start = len(labels) + 1 + for field_number, field in enumerate(self._get_missing_fields(), start=start): + if field is not None: # type: ignore self.__errors.append( - errors.ExtraLabelError( + errors.MissingLabelError( note="", labels=list(map(str, labels)), row_numbers=self.__row_numbers, label="", - field_name="", + field_name=field.name, field_number=field_number, ) ) - # Missing label - if len(fields) > len(labels): - start = len(labels) + 1 - iterator = fields[len(labels) :] - for field_number, field in enumerate(iterator, start=start): - if field is not None: # type: ignore - self.__errors.append( - errors.MissingLabelError( - note="", - labels=list(map(str, labels)), - row_numbers=self.__row_numbers, - label="", - field_name=field.name, - field_number=field_number, - ) - ) - # Iterate items - field_number = 0 - for field, label in zip(fields, labels): - field_number += 1 + for index, (field, label) in enumerate(zip(self.get_expected_fields(), labels)): + # field_number is 1-indexed + field_number = index + 1 # Blank label if not label: @@ -228,3 +226,100 @@ def __process(self): row_numbers=self.__row_numbers, ) ] + + def _get_extra_labels(self) -> List[str]: + """Returns unexpected extra labels. + + If `schema_sync=False`, the labels are expected to be in same order + and of same number than the schema fields. If the number of labels + is longer than the number of fields, the last labels are returned as + extra labels. + + If `schema_sync=True`, extra labels are ignored and this method + returns an empty list + """ + if not self.__schema_sync: + if len(self.fields) < len(self.labels): + return self.labels[len(self.fields) :] + return [] + + def _get_missing_fields(self) -> List[Field]: + """Returns unexpected missing fields. + + If `schema_sync=False`, the labels are expected to be in same order + and of same number than the schema fields. If the number of fields is + longer than the number of labels, the last fields are returned as + missing fields. + + If `schema_sync=True`, missing fields are ignored, except if they are + marked as `required`. + """ + + fields = self.fields + labels = self.labels + if not self.__schema_sync: + if len(fields) > len(labels): + return fields[len(labels) :] + else: + + def required_and_missing(field: Field) -> bool: + required: bool = field.required or ( + field.schema is not None and field.name in field.schema.primary_key + ) + missing = self._normalize(field.name) not in [ + self._normalize(label) for label in labels + ] + return required and missing + + return [field for field in fields if required_and_missing(field)] + + return [] + + def get_expected_fields(self) -> List[Field]: + """Returns a list of fields, in the order they are expected to be + found in the data. + + The label with same position, and its associated + data are expected to comply with the field's expectations. + + If `schema_sync=False`, the schema fields are precisely the expected + fields, so they are returned unchanged. + + If `schema_sync=True`, fields are reordered so as the field names to + match the labels. If no such field exists, an extra field with `type: any` is + created on the fly. + + The result is cached as a property. + """ + if not self.__expected_fields: + if not self.__schema_sync: + self.__expected_fields = self.fields + else: + expected_fields: List[Field] = [] + + if len(self.labels) != len(set(self.labels)): + note = '"schema_sync" requires unique labels in the header' + raise FrictionlessException(note) + + for label in self.labels: + field = self._find_field_by_name(label) + + if not field: + # Default value + field = Field.from_descriptor({"name": label, "type": "any"}) + + expected_fields.append(field) + self.__expected_fields = expected_fields + + return self.__expected_fields + + def _find_field_by_name(self, name: str) -> Optional[Field]: + try: + return next( + f for f in self.fields if self._normalize(f.name) == self._normalize(name) + ) + except StopIteration: + return None + + def _normalize(self, s: str) -> str: + return s.lower() if self.__ignore_case else s diff --git a/frictionless/table/row.py b/frictionless/table/row.py index b2947ba677..80de14bbc5 100644 --- a/frictionless/table/row.py +++ b/frictionless/table/row.py @@ -1,8 +1,9 @@ from __future__ import annotations +from collections import OrderedDict from functools import cached_property from itertools import zip_longest -from typing import Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional from .. import errors, helpers from ..platform import platform @@ -11,6 +12,9 @@ # Currently dict.update/setdefault/pop/popitem/clear is not disabled (can be confusing) # We can consider adding row.header property to provide more comprehensive API +if TYPE_CHECKING: + from ..schema.field import Field + # TODO: add types class Row(Dict[str, Any]): @@ -18,6 +22,9 @@ class Row(Dict[str, Any]): > Constructor of this object is not Public API + It works like a lazy dictionary: dictionary values are only computed (see + `__process` method) if needed. + This object is returned by `extract`, `resource.read_rows`, and other functions. ```python @@ -36,11 +43,13 @@ def __init__( self, cells: List[Any], *, - field_info: Dict[str, Any], + expected_fields: List[Field], row_number: int, ): self.__cells = cells - self.__field_info = field_info + self.__expected_fields: OrderedDict[str, Field] = OrderedDict( + (f.name, f) for f in expected_fields + ) self.__row_number = row_number self.__processed: bool = False self.__blank_cells: Dict[str, Any] = {} @@ -52,18 +61,24 @@ def __eq__(self, other: object): return super().__eq__(other) def __str__(self): - self.__process() - return super().__str__() + s = "" + if not self.__processed: + s = "Unprocessed: " + return s + super().__str__() def __repr__(self): - self.__process() - return super().__repr__() + s = "" + if not self.__processed: + s = "Unprocessed: " + return s + super().__repr__() def __setitem__(self, key: str, value: Any): try: - _, field_number, _, _ = self.__field_info["mapping"][key] - except KeyError: + keys = [k for k in self.__expected_fields.keys()] + field_number = keys.index(key) + 1 + except ValueError: raise KeyError(f"Row does not have a field {key}") + if len(self.__cells) < field_number: self.__cells.extend([None] * (field_number - len(self.__cells))) self.__cells[field_number - 1] = value @@ -73,30 +88,33 @@ def __missing__(self, key: str): return self.__process(key) def __iter__(self): - return iter(self.__field_info["names"]) + return iter(self.__expected_fields) def __len__(self): - return len(self.__field_info["names"]) + return len(self.__expected_fields) def __contains__(self, key: object): - return key in self.__field_info["mapping"] + return key in self.__expected_fields def __reversed__(self): - return reversed(self.__field_info["names"]) + return reversed(self.__expected_fields.keys()) def keys(self): - return iter(self.__field_info["names"]) + return self.__expected_fields.keys() def values(self): # type: ignore - for name in self.__field_info["names"]: + self.__process() + for name in self.__expected_fields: yield self[name] def items(self): # type: ignore - for name in self.__field_info["names"]: + self.__process() + for name in self.__expected_fields: yield (name, self[name]) def get(self, key: str, default: Optional[Any] = None): - if key not in self.__field_info["names"]: + self.__process() + if key not in self.__expected_fields: return default return self[key] @@ -104,8 +122,9 @@ def get(self, key: str, default: Optional[Any] = None): def cells(self): """ Returns: - Field[]: table schema fields + Any[]: Table cell values """ + self.__process() return self.__cells @cached_property @@ -114,7 +133,7 @@ def fields(self): Returns: Field[]: table schema fields """ - return self.__field_info["objects"] + return [f.to_copy() for f in self.__expected_fields.values()] @cached_property def field_names(self) -> List[str]: @@ -122,7 +141,7 @@ def field_names(self) -> List[str]: Returns: str[]: field names """ - return self.__field_info["names"] + return [k for k in self.__expected_fields] @cached_property def field_numbers(self): @@ -130,7 +149,7 @@ def field_numbers(self): Returns: str[]: field numbers """ - return list(range(1, len(self.__field_info["names"]) + 1)) + return list(range(1, len(self.__expected_fields) + 1)) @cached_property def row_number(self) -> int: @@ -201,14 +220,17 @@ def to_list(self, *, json: bool = False, types: Optional[List[str]] = None): # Prepare self.__process() - result = [self[name] for name in self.__field_info["names"]] + result = [self[name] for name in self.field_names] if types is None and json: types = platform.frictionless_formats.JsonParser.supported_types # Convert if types is not None: - for index, field_mapping in enumerate(self.__field_info["mapping"].values()): - field, _, _, cell_writer = field_mapping + field_names = self.field_names + for index, field_name in enumerate(field_names): + field = self.__expected_fields[field_name] + cell_writer = field.create_cell_writer() + # Here we can optimize performance if we use a types mapping if field.type in types: continue @@ -223,7 +245,11 @@ def to_list(self, *, json: bool = False, types: Optional[List[str]] = None): return result def to_dict( - self, *, csv: bool = False, json: bool = False, types: Optional[List[str]] = None + self, + *, + csv: bool = False, + json: bool = False, + types: Optional[List[str]] = None, ) -> Dict[str, Any]: """ Parameters: @@ -235,7 +261,7 @@ def to_dict( # Prepare self.__process() - result = {name: self[name] for name in self.__field_info["names"]} + result = {name: self[name] for name in self.__expected_fields} if types is None and json: types = platform.frictionless_formats.JsonParser.supported_types if types is None and csv: @@ -243,8 +269,11 @@ def to_dict( # Convert if types is not None: - for field_mapping in self.__field_info["mapping"].values(): - field, _, _, cell_writer = field_mapping + field_names = self.field_names + for field_name in field_names: + field = self.__expected_fields[field_name] + cell_writer = field.create_cell_writer() + # Here we can optimize performance if we use a types mapping if field.type not in types: cell = result[field.name] @@ -268,46 +297,48 @@ def __process(self, key: Optional[str] = None): # Prepare context cells = self.__cells to_str = lambda v: str(v) if v is not None else "" # type: ignore - fields = self.__field_info["objects"] - field_mapping = self.__field_info["mapping"] - iterator = zip_longest(field_mapping.values(), cells) + fields = [f.to_copy() for f in self.__expected_fields.values()] + + iterator = zip_longest(range(len(fields)), self.__expected_fields.values(), cells) is_empty = not bool(super().__len__()) + if key: try: - field, field_number, cell_reader, cell_writer = self.__field_info[ - "mapping" - ][key] - except KeyError: + field = self.__expected_fields.get(key) + field_index = self.field_names.index(key) + except ValueError: raise KeyError(f"Row does not have a field {key}") - cell = cells[field_number - 1] if len(cells) >= field_number else None - iterator = zip([(field, field_number, cell_reader, cell_writer)], [cell]) + + cell = cells[field_index] if len(cells) >= field_index + 1 else None + iterator = [(field_index, field, cell)] # Iterate cells - for field_mapping, source in iterator: + for index, field, cell in iterator: # Prepare context - if field_mapping is None: + if field is None: break - field, field_number, cell_reader, _ = field_mapping + cell_reader = field.create_cell_reader() + if not is_empty and super().__contains__(field.name): continue # Read cell - target, notes = cell_reader(source) + target, notes = cell_reader(cell) type_note = notes.pop("type", None) if notes else None if target is None and not type_note: - self.__blank_cells[field.name] = source + self.__blank_cells[field.name] = cell # Type error if type_note: - self.__error_cells[field.name] = source + self.__error_cells[field.name] = cell self.__errors.append( errors.TypeError( note=type_note, cells=list(map(to_str, cells)), # type: ignore row_number=self.__row_number, - cell=str(source), + cell=str(cell), field_name=field.name, - field_number=field_number, + field_number=index + 1, ) ) @@ -319,9 +350,9 @@ def __process(self, key: Optional[str] = None): note=note, cells=list(map(to_str, cells)), # type: ignore row_number=self.__row_number, - cell=str(source), + cell=str(cell), field_name=field.name, - field_number=field_number, + field_number=index + 1, ) ) diff --git a/frictionless/validator/__spec__/__init__.py b/frictionless/validator/__spec__/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/frictionless/validator/__spec__/package/__init__.py b/frictionless/validator/__spec__/package/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/frictionless/validator/__spec__/package/test_checklist.py b/frictionless/validator/__spec__/package/test_checklist.py deleted file mode 100644 index db25ec888d..0000000000 --- a/frictionless/validator/__spec__/package/test_checklist.py +++ /dev/null @@ -1,23 +0,0 @@ -import json - -from frictionless import Checklist, Package - - -def test_package_validate_with_skip_errors(): - ## Test runs on data with two blank-row errors, one primary-key error, see - # first test case - test_cases = [ - {"ignore": [], "expect_errors": ["blank-row", "primary-key", "blank-row"]}, - {"ignore": ["primary-key"], "expect_errors": ["blank-row", "blank-row"]}, - {"ignore": ["blank-row"], "expect_errors": ["primary-key"]}, - {"ignore": ["blank-row", "primary-key"], "expect_errors": []}, - ] - - for tc in test_cases: - with open("data/invalid/datapackage.json") as file: - package = Package(json.load(file), basepath="data/invalid") - checklist = Checklist(skip_errors=tc["ignore"]) - - report = package.validate(checklist) - - assert report.flatten(["type"]) == [[t] for t in tc["expect_errors"]] diff --git a/frictionless/validator/__spec__/package/test_parallel.py b/frictionless/validator/__spec__/package/test_parallel.py deleted file mode 100644 index a54673a69d..0000000000 --- a/frictionless/validator/__spec__/package/test_parallel.py +++ /dev/null @@ -1,38 +0,0 @@ -import json - -import pytest - -from frictionless import Package - -# General - - -@pytest.mark.ci -def test_package_validate_parallel_from_dict(): - with open("data/package/datapackage.json") as file: - package = Package(json.load(file), basepath="data/package") - report = package.validate(parallel=True) - assert report.valid - - -@pytest.mark.ci -def test_package_validate_parallel_from_dict_invalid(): - with open("data/invalid/datapackage.json") as file: - package = Package(json.load(file), basepath="data/invalid") - report = package.validate(parallel=True) - assert report.flatten(["taskNumber", "rowNumber", "fieldNumber", "type"]) == [ - [1, 3, None, "blank-row"], - [1, 3, None, "primary-key"], - [2, 4, None, "blank-row"], - ] - - -@pytest.mark.ci -def test_package_validate_with_parallel(): - package = Package("data/invalid/datapackage.json") - report = package.validate(parallel=True) - assert report.flatten(["taskNumber", "rowNumber", "fieldNumber", "type"]) == [ - [1, 3, None, "blank-row"], - [1, 3, None, "primary-key"], - [2, 4, None, "blank-row"], - ] diff --git a/frictionless/validator/__spec__/package/test_schema.py b/frictionless/validator/__spec__/package/test_schema.py deleted file mode 100644 index 05a5f5fb63..0000000000 --- a/frictionless/validator/__spec__/package/test_schema.py +++ /dev/null @@ -1,179 +0,0 @@ -from copy import deepcopy - -from frictionless import Detector, Package, Resource, Schema, fields - -# General - - -DESCRIPTOR_FK = { - "resources": [ - { - "name": "cities", - "data": [ - ["id", "name", "next_id"], - [1, "london", 2], - [2, "paris", 3], - [3, "rome", 4], - [4, "rio", None], - ], - "schema": { - "fields": [ - {"name": "id", "type": "integer"}, - {"name": "name", "type": "string"}, - {"name": "next_id", "type": "integer"}, - ], - "foreignKeys": [ - {"fields": "next_id", "reference": {"resource": "", "fields": "id"}}, - { - "fields": "id", - "reference": {"resource": "people", "fields": "label"}, - }, - ], - }, - }, - { - "name": "people", - "data": [["label", "population"], [1, 8], [2, 2], [3, 3], [4, 6]], - }, - ], -} - -MULTI_FK_RESSOURCE = { - "name": "travel_time", - "data": [["from", "to", "hours"], [1, 2, 1.5], [2, 3, 8], [3, 4, 18]], - "schema": { - "fields": [ - {"name": "from", "type": "integer"}, - {"name": "to", "type": "integer"}, - {"name": "hours", "type": "number"}, - ], - "foreignKeys": [ - { - "fields": ["from", "to"], - "reference": {"resource": "cities", "fields": ["id", "next_id"]}, - } - ], - }, -} - - -def test_package_validate_schema_foreign_key_error(): - descriptor = deepcopy(DESCRIPTOR_FK) - package = Package(descriptor) - report = package.validate() - assert report.valid - - -def test_package_validate_schema_foreign_key_not_defined(): - descriptor = deepcopy(DESCRIPTOR_FK) - del descriptor["resources"][0]["schema"]["foreignKeys"] - package = Package(descriptor) - report = package.validate() - assert report.valid - - -def test_package_validate_schema_foreign_key_self_referenced_resource_violation(): - descriptor = deepcopy(DESCRIPTOR_FK) - del descriptor["resources"][0]["data"][4] - package = Package(descriptor) - report = package.validate() - assert report.flatten(["rowNumber", "fieldNumber", "type", "cells"]) == [ - [4, None, "foreign-key", ["3", "rome", "4"]], - ] - - -def test_package_validate_schema_foreign_key_internal_resource_violation(): - descriptor = deepcopy(DESCRIPTOR_FK) - del descriptor["resources"][1]["data"][4] - package = Package(descriptor) - report = package.validate() - assert report.flatten(["rowNumber", "fieldNumber", "type", "cells"]) == [ - [5, None, "foreign-key", ["4", "rio", ""]], - ] - - -def test_package_validate_schema_foreign_key_internal_resource_violation_non_existent(): - descriptor = deepcopy(DESCRIPTOR_FK) - descriptor["resources"][1]["data"] = [["label", "population"], [10, 10]] - package = Package(descriptor) - report = package.validate() - assert report.flatten(["rowNumber", "fieldNumber", "type", "cells"]) == [ - [2, None, "foreign-key", ["1", "london", "2"]], - [3, None, "foreign-key", ["2", "paris", "3"]], - [4, None, "foreign-key", ["3", "rome", "4"]], - [5, None, "foreign-key", ["4", "rio", ""]], - ] - - -def test_package_validate_schema_multiple_foreign_key(): - descriptor = deepcopy(DESCRIPTOR_FK) - descriptor["resources"].append(MULTI_FK_RESSOURCE) - package = Package(descriptor) - report = package.validate() - assert report.valid - - -def test_package_validate_schema_multiple_foreign_key_resource_violation_non_existent(): - descriptor = deepcopy(DESCRIPTOR_FK) - # remove London - del descriptor["resources"][0]["data"][1] - descriptor["resources"].append(MULTI_FK_RESSOURCE) - package = Package(descriptor) - report = package.validate() - assert report.flatten(["rowNumber", "fieldNumber", "type", "cells", "note"]) == [ - [ - 2, - None, - "foreign-key", - ["1", "2", "1.5"], - 'for "from, to": values "1, 2" not found in the lookup table "cities" as "id, next_id"', - ], - ] - - -def test_package_validate_schema_multiple_foreign_key_violations(): - descriptor = deepcopy(DESCRIPTOR_FK) - # Add some wrong fks - descriptor["resources"][0]["data"][3][0] = 5 - descriptor["resources"][0]["data"][4][0] = 6 - descriptor["resources"].append(MULTI_FK_RESSOURCE) - package = Package(descriptor) - report = package.validate() - assert report.flatten( - [ - "rowNumber", - "fieldNames", - "fieldCells", - "referenceName", - "referenceFieldNames", - ] - ) == [ - [3, ["next_id"], ["3"], "", ["id"]], - [4, ["next_id"], ["4"], "", ["id"]], - [4, ["id"], ["5"], "people", ["label"]], - [5, ["id"], ["6"], "people", ["label"]], - [4, ["from", "to"], ["3", "4"], "cities", ["id", "next_id"]], - ] - - -# Bugs - - -def test_package_validate_using_detector_schema_sync_issue_847(): - package = Package( - resources=[ - Resource( - data=[["f1"], ["v1"], ["v2"], ["v3"]], - schema=Schema( - fields=[ - fields.AnyField(name="f1"), - fields.AnyField(name="f2"), - ] - ), - ), - ] - ) - for resource in package.resources: - resource.detector = Detector(schema_sync=True) - report = package.validate() - assert report.valid diff --git a/frictionless/validator/__spec__/package/test_stats.py b/frictionless/validator/__spec__/package/test_stats.py deleted file mode 100644 index 3c921ea3cd..0000000000 --- a/frictionless/validator/__spec__/package/test_stats.py +++ /dev/null @@ -1,79 +0,0 @@ -from copy import deepcopy - -import pytest - -from frictionless import Package, platform - -# General - - -DESCRIPTOR_SH = { - "resources": [ - { - "name": "resource1", - "path": "data/table.csv", - "hash": "sha256:a1fd6c5ff3494f697874deeb07f69f8667e903dd94a7bc062dd57550cea26da8", - "bytes": 30, - } - ] -} - - -@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows") -def test_package_validate_stats(): - source = deepcopy(DESCRIPTOR_SH) - package = Package(source) - report = package.validate() - assert report.valid - - -def test_package_validate_stats_invalid(): - source = deepcopy(DESCRIPTOR_SH) - source["resources"][0]["hash"] += "a" - source["resources"][0]["bytes"] += 1 - package = Package(source) - report = package.validate() - assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ - [None, None, "hash-count"], - [None, None, "byte-count"], - ] - - -@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows") -def test_package_validate_stats_size(): - source = deepcopy(DESCRIPTOR_SH) - source["resources"][0].pop("hash") - package = Package(source) - report = package.validate() - assert report.valid - - -def test_package_validate_stats_size_invalid(): - source = deepcopy(DESCRIPTOR_SH) - source["resources"][0]["bytes"] += 1 - source["resources"][0].pop("hash") - package = Package(source) - report = package.validate() - assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ - [None, None, "byte-count"], - ] - - -@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows") -def test_package_validate_stats_hash(): - source = deepcopy(DESCRIPTOR_SH) - source["resources"][0].pop("bytes") - package = Package(source) - report = package.validate() - assert report.valid - - -def test_package_validate_check_file_package_stats_hash_invalid(): - source = deepcopy(DESCRIPTOR_SH) - source["resources"][0].pop("bytes") - source["resources"][0]["hash"] += "a" - package = Package(source) - report = package.validate() - assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ - [None, None, "hash-count"], - ] diff --git a/frictionless/validator/__spec__/resource/__init__.py b/frictionless/validator/__spec__/resource/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/frictionless/validator/__spec__/resource/test_checklist.py b/frictionless/validator/__spec__/resource/test_checklist.py deleted file mode 100644 index ed30463993..0000000000 --- a/frictionless/validator/__spec__/resource/test_checklist.py +++ /dev/null @@ -1,14 +0,0 @@ -from frictionless import Checklist -from frictionless.resources import TableResource - -# General - - -def test_resource_validate_bound_checklist(): - checklist = Checklist(pick_errors=["blank-label", "blank-row"]) - resource = TableResource(path="data/invalid.csv") - report = resource.validate(checklist) - assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [ - [None, 3, "blank-label"], - [4, None, "blank-row"], - ] diff --git a/frictionless/validator/__spec__/resource/test_compression.py b/frictionless/validator/__spec__/resource/test_compression.py deleted file mode 100644 index 8fa7bacfa4..0000000000 --- a/frictionless/validator/__spec__/resource/test_compression.py +++ /dev/null @@ -1,23 +0,0 @@ -from frictionless.resources import TableResource - -# General - - -def test_resource_validate_compression(): - resource = TableResource(path="data/table.csv.zip") - report = resource.validate() - assert report.valid - - -def test_resource_validate_compression_explicit(): - resource = TableResource(path="data/table.csv.zip", compression="zip") - report = resource.validate() - assert report.valid - - -def test_resource_validate_compression_invalid(): - resource = TableResource(path="data/table.csv.zip", compression="bad") - report = resource.validate() - assert report.flatten(["type", "note"]) == [ - ["compression-error", 'compression "bad" is not supported'], - ] diff --git a/frictionless/validator/__spec__/resource/test_detector.py b/frictionless/validator/__spec__/resource/test_detector.py deleted file mode 100644 index c136cd9e62..0000000000 --- a/frictionless/validator/__spec__/resource/test_detector.py +++ /dev/null @@ -1,130 +0,0 @@ -from frictionless import Detector, Dialect, Schema -from frictionless.resources import TableResource - -# General - - -def test_resource_validate_detector_sync_schema(): - schema = Schema.from_descriptor( - { - "fields": [ - {"name": "id", "type": "integer"}, - {"name": "name", "type": "string"}, - ], - } - ) - detector = Detector(schema_sync=True) - resource = TableResource( - path="data/sync-schema.csv", schema=schema, detector=detector - ) - report = resource.validate() - assert report.valid - assert resource.schema.to_descriptor() == { - "fields": [ - {"name": "name", "type": "string"}, - {"name": "id", "type": "integer"}, - ], - } - - -def test_resource_validate_detector_sync_schema_invalid(): - source = [["LastName", "FirstName", "Address"], ["Test", "Tester", "23 Avenue"]] - schema = Schema.from_descriptor( - { - "fields": [ - {"name": "id", "type": "string"}, - {"name": "FirstName", "type": "string"}, - {"name": "LastName", "type": "string"}, - ] - } - ) - detector = Detector(schema_sync=True) - resource = TableResource(data=source, schema=schema, detector=detector) - report = resource.validate() - assert report.valid - - -def test_resource_validate_detector_headers_errors(): - source = [ - ["id", "last_name", "first_name", "language"], - [1, "Alex", "John", "English"], - [2, "Peters", "John", "Afrikaans"], - [3, "Smith", "Paul", None], - ] - schema = Schema.from_descriptor( - { - "fields": [ - {"name": "id", "type": "number"}, - {"name": "language", "type": "string", "constraints": {"required": True}}, - {"name": "country", "type": "string"}, - ] - } - ) - detector = Detector(schema_sync=True) - resource = TableResource(data=source, schema=schema, detector=detector) - report = resource.validate() - assert report.flatten(["rowNumber", "fieldNumber", "type", "cells"]) == [ - [4, 4, "constraint-error", ["3", "Smith", "Paul", ""]], - ] - - -def test_resource_validate_detector_patch_schema(): - detector = Detector(schema_patch={"missingValues": ["-"]}) - resource = TableResource(path="data/table.csv", detector=detector) - report = resource.validate() - assert report.valid - assert resource.schema.to_descriptor() == { - "fields": [ - {"name": "id", "type": "integer"}, - {"name": "name", "type": "string"}, - ], - "missingValues": ["-"], - } - - -def test_resource_validate_detector_patch_schema_fields(): - detector = Detector( - schema_patch={"fields": {"id": {"type": "string"}}, "missingValues": ["-"]} - ) - resource = TableResource(path="data/table.csv", detector=detector) - report = resource.validate() - assert report.valid - assert resource.schema.to_descriptor() == { - "fields": [{"name": "id", "type": "string"}, {"name": "name", "type": "string"}], - "missingValues": ["-"], - } - - -def test_resource_validate_detector_infer_type_string(): - detector = Detector(field_type="string") - resource = TableResource(path="data/table.csv", detector=detector) - report = resource.validate() - assert report.valid - assert resource.schema.to_descriptor() == { - "fields": [{"name": "id", "type": "string"}, {"name": "name", "type": "string"}], - } - - -def test_resource_validate_detector_infer_type_any(): - detector = Detector(field_type="any") - resource = TableResource(path="data/table.csv", detector=detector) - report = resource.validate() - assert report.valid - assert resource.schema.to_descriptor() == { - "fields": [{"name": "id", "type": "any"}, {"name": "name", "type": "any"}], - } - - -def test_resource_validate_detector_infer_names(): - dialect = Dialect(header=False) - detector = Detector(field_names=["id", "name"]) - resource = TableResource( - path="data/without-headers.csv", dialect=dialect, detector=detector - ) - report = resource.validate() - assert report.valid - assert resource.schema.fields[0].name == "id" - assert resource.schema.fields[1].name == "name" - assert resource.stats.rows == 3 - assert resource.labels == [] - assert resource.header == ["id", "name"] diff --git a/frictionless/validator/__spec__/resource/test_encoding.py b/frictionless/validator/__spec__/resource/test_encoding.py deleted file mode 100644 index 9780c23172..0000000000 --- a/frictionless/validator/__spec__/resource/test_encoding.py +++ /dev/null @@ -1,25 +0,0 @@ -import pytest - -from frictionless import platform -from frictionless.resources import TableResource - -# General - - -def test_resource_validate_encoding(): - resource = TableResource(path="data/table.csv", encoding="utf-8") - report = resource.validate() - assert report.valid - - -@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows") -def test_resource_validate_encoding_invalid(): - resource = TableResource(path="data/latin1.csv", encoding="utf-8") - report = resource.validate() - assert not report.valid - assert report.flatten(["type", "note"]) == [ - [ - "encoding-error", - "'utf-8' codec can't decode byte 0xa9 in position 20: invalid start byte", - ], - ] diff --git a/frictionless/validator/__spec__/resource/test_file.py b/frictionless/validator/__spec__/resource/test_file.py deleted file mode 100644 index eadad13769..0000000000 --- a/frictionless/validator/__spec__/resource/test_file.py +++ /dev/null @@ -1,16 +0,0 @@ -from frictionless import Resource - -# General - - -def test_resource_validate_format_non_tabular(): - resource = Resource("data/table.bad") - report = resource.validate() - assert report.valid - - -def test_resource_validate_invalid_resource_standards_v2_strict(): - report = Resource.validate_descriptor({"path": "data/table.csv"}) - assert report.flatten(["type", "note"]) == [ - ["resource-error", "'name' is a required property"], - ] diff --git a/frictionless/validator/__spec__/resource/test_format.py b/frictionless/validator/__spec__/resource/test_format.py deleted file mode 100644 index 002df585d1..0000000000 --- a/frictionless/validator/__spec__/resource/test_format.py +++ /dev/null @@ -1,9 +0,0 @@ -from frictionless.resources import TableResource - -# General - - -def test_resource_validate_format(): - resource = TableResource(path="data/table.csv", format="csv") - report = resource.validate() - assert report.valid diff --git a/frictionless/validator/__spec__/resource/test_scheme.py b/frictionless/validator/__spec__/resource/test_scheme.py deleted file mode 100644 index fdcca6c8e8..0000000000 --- a/frictionless/validator/__spec__/resource/test_scheme.py +++ /dev/null @@ -1,17 +0,0 @@ -from frictionless.resources import TableResource - -# General - - -def test_resource_validate_scheme(): - resource = TableResource(path="data/table.csv", scheme="file") - report = resource.validate() - assert report.valid - - -def test_resource_validate_scheme_invalid(): - resource = TableResource(path="bad://data/table.csv") - report = resource.validate() - assert report.flatten(["type", "note"]) == [ - ["scheme-error", 'scheme "bad" is not supported'], - ] diff --git a/frictionless/validator/__spec__/resource/test_stats.py b/frictionless/validator/__spec__/resource/test_stats.py deleted file mode 100644 index ab19b9239c..0000000000 --- a/frictionless/validator/__spec__/resource/test_stats.py +++ /dev/null @@ -1,66 +0,0 @@ -from frictionless.resources import TableResource - -# General - - -def test_resource_validate_stats_hash(): - hash = "sha256:a1fd6c5ff3494f697874deeb07f69f8667e903dd94a7bc062dd57550cea26da8" - resource = TableResource(path="data/table.csv", hash=hash) - report = resource.validate() - assert report.task.valid - - -def test_resource_validate_stats_hash_invalid(): - hash = "6c2c61dd9b0e9c6876139a449ed87933" - resource = TableResource(path="data/table.csv", hash="bad") - report = resource.validate() - assert report.flatten(["type", "note"]) == [ - [ - "hash-count", - 'expected is "bad" and actual is "%s"' % hash, - ], - ] - - -def test_resource_validate_stats_bytes(): - resource = TableResource(path="data/table.csv", bytes=30) - report = resource.validate() - assert report.task.valid - - -def test_resource_validate_stats_bytes_invalid(): - resource = TableResource(path="data/table.csv", bytes=40) - report = resource.validate() - assert report.task.error.to_descriptor().get("rowNumber") is None - assert report.task.error.to_descriptor().get("fieldNumber") is None - assert report.flatten(["type", "note"]) == [ - ["byte-count", 'expected is "40" and actual is "30"'], - ] - - -def test_resource_validate_stats_rows(): - resource = TableResource(path="data/table.csv", rows=2) - report = resource.validate() - assert report.task.valid - - -def test_resource_validate_stats_rows_invalid(): - resource = TableResource(path="data/table.csv", rows=3) - report = resource.validate() - assert report.task.error.to_descriptor().get("rowNumber") is None - assert report.task.error.to_descriptor().get("fieldNumber") is None - assert report.flatten(["type", "note"]) == [ - ["row-count", 'expected is "3" and actual is "2"'], - ] - - -def test_resource_validate_stats_not_supported_hash_algorithm(): - resource = TableResource.from_descriptor( - { - "name": "name", - "path": "data/table.csv", - "hash": "sha1:db6ea2f8ff72a9e13e1d70c28ed1c6b42af3bb0e", - } - ) - report = resource.validate() - assert report.task.warnings == ["hash is ignored; supported algorithms: md5/sha256"] diff --git a/frictionless/validator/validator.py b/frictionless/validator/validator.py index 523143cdbc..da11c94437 100644 --- a/frictionless/validator/validator.py +++ b/frictionless/validator/validator.py @@ -1,207 +1,35 @@ from __future__ import annotations -from multiprocessing import Pool -from typing import TYPE_CHECKING, Any, Dict, List, Optional - -from .. import helpers, settings -from ..checklist import Checklist -from ..exception import FrictionlessException -from ..platform import platform -from ..report import Report +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: - from .. import types - from ..error import Error from ..package import Package from ..resource import Resource class Validator: - # Package - - def validate_package( - self, - package: Package, - *, - checklist: Optional[Checklist] = None, - name: Optional[str] = None, - parallel: bool = False, - limit_rows: Optional[int] = None, - limit_errors: int = settings.DEFAULT_LIMIT_ERRORS, - ): - # Create state - timer = helpers.Timer() - reports: List[Report] = [] - resources = package.resources if name is None else [package.get_resource(name)] - with_fks = any(res.schema and res.schema.foreign_keys for res in resources) - - # Prepare checklist - checklist = checklist or Checklist() + """ + Validator is deprecated, and only kept for backwards compatibility. - # Validate metadata - try: - package.to_descriptor(validate=True) - except FrictionlessException as exception: - return Report.from_validation(time=timer.time, errors=exception.to_errors()) + There is no plan to remove this class in future versions. + """ - # Validate sequential - if not parallel or with_fks: - for resource in resources: - report = resource.validate( - checklist=checklist, - limit_errors=limit_errors, - limit_rows=limit_rows, - ) - reports.append(report) + def validate_package(self, package: Package, *args: Any, **kwargs: Any): + """ + Validator.validate_package is deprecated, use and see Package.validate + instead. - # Validate parallel - else: - with Pool() as pool: - options_pool: List[Dict[str, Any]] = [] - for resource in resources: - options = {} - options["resource"] = {} - options["resource"]["descriptor"] = resource.to_descriptor() - options["resource"]["basepath"] = resource.basepath - options["validate"] = {} - options["validate"]["limit_rows"] = limit_rows - options["validate"]["limit_errors"] = limit_errors - options_pool.append(options) - report_descriptors = pool.map(validate_parallel, options_pool) - for report_descriptor in report_descriptors: - reports.append(Report.from_descriptor(report_descriptor)) - - # Return report - return Report.from_validation_reports( - time=timer.time, - reports=reports, - ) + There is no plan to remove this method in future versions. + """ + package.validate(*args, **kwargs) # Resource - def validate_resource( - self, - resource: Resource, - *, - checklist: Optional[Checklist] = None, - limit_errors: int = settings.DEFAULT_LIMIT_ERRORS, - limit_rows: Optional[int] = None, - on_row: Optional[types.ICallbackFunction] = None, - ): - # Create state - partial = False - timer = helpers.Timer() - labels: List[str] = [] - errors: List[Error] = [] - warnings: List[str] = [] - - # Prepare checklist - checklist = checklist or Checklist() - checks = checklist.connect(resource) - - # Validate metadata - try: - resource.to_descriptor(validate=True) - except FrictionlessException as exception: - return Report.from_validation_task( - resource, time=timer.time, errors=exception.to_errors() - ) - - # TODO: remove in next version - # Ignore not-supported hashings - if resource.hash: - algorithm, _ = helpers.parse_resource_hash_v1(resource.hash) - if algorithm not in ["md5", "sha256"]: - warning = "hash is ignored; supported algorithms: md5/sha256" - warnings.append(warning) - - # Prepare resource - if resource.closed: - try: - resource.open() - except FrictionlessException as exception: - resource.close() - return Report.from_validation_task( - resource, time=timer.time, errors=exception.to_errors() - ) - - # Validate data - with resource: - # Validate start - for index, check in enumerate(checks): - for error in check.validate_start(): - if error.type == "check-error": - del checks[index] - if checklist.match(error): - errors.append(error) - - # Validate file - if not isinstance(resource, platform.frictionless_resources.TableResource): - if resource.hash is not None or resource.bytes is not None: - helpers.pass_through(resource.byte_stream) - - # Validate table - else: - row_count = 0 - labels = resource.labels - while True: - row_count += 1 - - # Emit row - try: - row = next(resource.row_stream) # type: ignore - except FrictionlessException as exception: - errors.append(exception.error) - continue - except StopIteration: - break - - # Validate row - for check in checks: - for error in check.validate_row(row): - if checklist.match(error): - errors.append(error) - - # Callback row - if on_row: - on_row(row) - - # Limit rows - if limit_rows: - if row_count >= limit_rows: - warning = f"reached row limit: {limit_rows}" - warnings.append(warning) - partial = True - break - - # Limit errors - if limit_errors: - if len(errors) >= limit_errors: - errors = errors[:limit_errors] - warning = f"reached error limit: {limit_errors}" - warnings.append(warning) - partial = True - break - - # Validate end - if not partial: - for check in checks: - for error in check.validate_end(): - if checklist.match(error): - errors.append(error) - - # Return report - return Report.from_validation_task( - resource, time=timer.time, labels=labels, errors=errors, warnings=warnings - ) - - -# Internal - + def validate_resource(self, resource: Resource, *args: Any, **kwargs: Any): + """ + Validator.validate_resource is deprecated, use and see Resource.validate + instead. -def validate_parallel(options: types.IDescriptor) -> types.IDescriptor: - resource_options = options["resource"] - validate_options = options["validate"] - resource = Resource.from_descriptor(**resource_options) - report = resource.validate(**validate_options) - return report.to_descriptor() + There is no plan to remove this method in future versions. + """ + resource.validate(*args, **kwargs)