-
Notifications
You must be signed in to change notification settings - Fork 24
SHACL Validation #767
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SHACL Validation #767
Changes from 28 commits
d5be7fe
f8625b3
6a83b7b
7a08904
9a34f7f
d1243f3
e3bc017
52c5bcc
db33fc0
39b2e9a
b9535a0
5339b6d
5624fdd
f0f20d4
b8ddbd5
8370636
c827f3a
bcb441b
a760d2f
901c67c
5e72d13
835cff1
fda27ad
b776273
04b07eb
ed81d9f
b299b49
86e0456
328f5b5
e868ba0
c0468b4
cd13981
35296da
7537693
3a4a74d
877113e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| # -*- coding: utf-8 -*- | ||
| # | ||
| # Copyright 2019 - Swiss Data Science Center (SDSC) | ||
| # A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and | ||
| # Eidgenössische Technische Hochschule Zürich (ETHZ). | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| """Check KG structure using SHACL.""" | ||
| import yaml | ||
| from rdflib.namespace import Namespace | ||
| from rdflib.term import BNode | ||
|
|
||
| from renku.core.commands.echo import WARNING | ||
| from renku.core.compat import pyld | ||
| from renku.core.models.jsonld import NoDatesSafeLoader | ||
| from renku.core.utils.shacl import validate_graph | ||
|
|
||
|
|
||
| def _shacl_graph_to_string(graph): | ||
| """Converts a shacl validation graph into human readable format.""" | ||
| sh = Namespace('http://www.w3.org/ns/shacl#') | ||
|
|
||
| problems = [] | ||
|
|
||
| for _, result in graph.subject_objects(sh.result): | ||
| path = graph.value(result, sh.resultPath) | ||
| res = graph.value(result, sh.resultMessage) | ||
|
|
||
| if res: | ||
| message = '{}: {}'.format(path, res) | ||
| else: | ||
| kind = graph.value(result, sh.sourceConstraintComponent) | ||
| focusNode = graph.value(result, sh.focusNode) | ||
|
|
||
| if isinstance(focusNode, BNode): | ||
| focusNode = '<Anonymous>' | ||
|
|
||
| message = '{}: Type: {}, Node ID: {}'.format(path, kind, focusNode) | ||
|
|
||
| problems.append(message) | ||
|
|
||
| return '\n\t'.join(problems) | ||
|
|
||
|
|
||
| def check_project_structure(client): | ||
| """Validate project metadata against SHACL.""" | ||
| project_path = client.renku_metadata_path | ||
|
|
||
| conform, graph, t = check_shacl_structure(project_path) | ||
|
|
||
| if conform: | ||
| return True, None | ||
|
|
||
| problems = ( | ||
| WARNING + 'Invalid structure of project metadata\n\t' + | ||
| _shacl_graph_to_string(graph) | ||
| ) | ||
|
|
||
| return False, problems | ||
|
|
||
|
|
||
| def check_datasets_structure(client): | ||
| """Validate dataset metadata against SHACL.""" | ||
| ok = True | ||
|
|
||
| problems = WARNING + 'Invalid structure of dataset metadata\n' | ||
|
|
||
| for path in client.renku_datasets_path.rglob(client.METADATA): | ||
| try: | ||
| conform, graph, t = check_shacl_structure(path) | ||
| except (Exception, BaseException) as e: | ||
| problems += 'Couldn\'t validate {}: {}\n\n'.format(path, e) | ||
| continue | ||
|
|
||
| if conform: | ||
| continue | ||
|
|
||
| ok = False | ||
|
|
||
| problems += str(path) + '\n\t' + _shacl_graph_to_string(graph) + '\n\n' | ||
Panaetius marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| if ok: | ||
| return True, None | ||
|
|
||
| return False, problems | ||
|
|
||
|
|
||
| def check_shacl_structure(path): | ||
| """Validates all metadata aginst the SHACL schema.""" | ||
| with path.open(mode='r') as fp: | ||
| source = yaml.load(fp, Loader=NoDatesSafeLoader) or {} | ||
|
|
||
| rdf = pyld.jsonld.to_rdf( | ||
| source, | ||
| options={ | ||
| 'format': 'application/n-quads', | ||
| 'produceGeneralizedRdf': True | ||
| } | ||
| ) | ||
|
|
||
| return validate_graph(rdf) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,10 +18,13 @@ | |
| """Compatibility layer for different Python versions.""" | ||
|
|
||
| import contextlib | ||
| import json | ||
| import os | ||
| import sys | ||
| from pathlib import Path | ||
|
|
||
| import pyld | ||
|
|
||
| if sys.version_info < (3, 6): | ||
| original_resolve = Path.resolve | ||
|
|
||
|
|
@@ -63,4 +66,25 @@ def __exit__(self, *excinfo): | |
| except NameError: # pragma: no cover | ||
| FileNotFoundError = IOError | ||
|
|
||
| __all__ = ('FileNotFoundError', 'Path', 'contextlib') | ||
|
|
||
| class PatchedActiveContextCache(pyld.jsonld.ActiveContextCache): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we send a PR for this to the pyld maintainers? 🤔
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is one, from 2015, with 0 comment or reaction by them: digitalbazaar/pyld#44 I added my solution as a comment there (I think my solution is better, but opening another PR when this one was ignored probably isn't worth the effort), but it looks like they're not interested in it. |
||
| """Pyld context cache without issue of missing contexts.""" | ||
|
|
||
| def set(self, active_ctx, local_ctx, result): | ||
| if len(self.order) == self.size: | ||
| entry = self.order.popleft() | ||
| if sum( | ||
| e['activeCtx'] == entry['activeCtx'] and | ||
| e['localCtx'] == entry['localCtx'] for e in self.order | ||
| ) == 0: | ||
| # only delete from cache if it doesn't exist in context deque | ||
| del self.cache[entry['activeCtx']][entry['localCtx']] | ||
| key1 = json.dumps(active_ctx) | ||
| key2 = json.dumps(local_ctx) | ||
| self.order.append({'activeCtx': key1, 'localCtx': key2}) | ||
| self.cache.setdefault(key1, {})[key2] = json.loads(json.dumps(result)) | ||
|
|
||
|
|
||
| pyld.jsonld._cache = {'activeCtx': PatchedActiveContextCache()} | ||
|
|
||
| __all__ = ('FileNotFoundError', 'Path', 'contextlib', 'pyld') | ||
Uh oh!
There was an error while loading. Please reload this page.