Skip to content

Commit a39d143

Browse files
authored
fix: handling of Literal datatype (RDFLib#2076)
Check datatype against `None` instead of checking it's truthiness (i.e. `if datatype is not None:` instead of `if datatype:`). Checking truthiness instead of `is not None` causes a blank string to be treated the same as None. The consequence of this was that `Literal.datatype` could be a `str`, a `URIRef` or `None`, instead of just a `URIRef` or `None` as was seemingly intended. Other changes: - Changed the type of `Literal.datatype` to be `Optional[URIRef]` instead of `Optional[str]` now that `str` will always be converted to `URIRef` even if it is a blank string. - Changed `rdflib.util._coalesce` to make it easier and safer to use with a non-`None` default value. - Changed `rdflib.util` to avoid issues with circular imports.
1 parent a4b9305 commit a39d143

File tree

9 files changed

+250
-76
lines changed

9 files changed

+250
-76
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ RDFLib.sublime-project
22
/docs/_build/
33
RDFLib.sublime-workspace
44
coverage/
5+
cov.xml
56
/.hgtags
67
/.hgignore
78
build/

CHANGELOG.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,25 @@ and will be removed for release.
248248
<!-- -->
249249
<!-- -->
250250

251+
252+
<!-- -->
253+
<!-- -->
254+
<!-- CHANGE BARRIER: START PR #2076 -->
255+
<!-- -->
256+
<!-- -->
257+
258+
- Fixed handling of `Literal` `datatype` to correctly differentiate between
259+
blank string values and undefined values, also changed the datatype of
260+
`rdflib.term.Literal.datatype` from `Optional[str]` to `Optional[URIRef]` now
261+
that all non-`URIRef` `str` values will be converted to `URIRef`.
262+
[PR #2076](https://github.com/RDFLib/rdflib/pull/2076).
263+
264+
<!-- -->
265+
<!-- -->
266+
<!-- CHANGE BARRIER: END PR #2076 -->
267+
<!-- -->
268+
<!-- -->
269+
251270
<!-- -->
252271
<!-- -->
253272
<!-- CHANGE BARRIER: START -->

Taskfile.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ tasks:
148148
mypy:
149149
desc: Run mypy
150150
cmds:
151-
- "{{._PYTHON | shellQuote}} -m mypy --show-error-context --show-error-codes"
151+
- "{{._PYTHON | shellQuote}} -m mypy --show-error-context --show-error-codes {{.CLI_ARGS}}"
152152

153153
lint:fix:
154154
desc: Fix auto-fixable linting errors

rdflib/plugins/stores/memory.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -217,14 +217,12 @@ def bind(self, prefix: str, namespace: "URIRef", override: bool = True) -> None:
217217
self.__namespace[prefix] = namespace
218218
else:
219219
# type error: Invalid index type "Optional[URIRef]" for "Dict[URIRef, str]"; expected type "URIRef"
220-
# type error: Incompatible types in assignment (expression has type "Optional[str]", target has type "str")
221-
self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index, assignment]
222-
bound_prefix, prefix
220+
self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index]
221+
bound_prefix, default=prefix
223222
)
224223
# type error: Invalid index type "Optional[str]" for "Dict[str, URIRef]"; expected type "str"
225-
# type error: Incompatible types in assignment (expression has type "Optional[URIRef]", target has type "URIRef")
226-
self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index, assignment]
227-
bound_namespace, namespace
224+
self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index]
225+
bound_namespace, default=namespace
228226
)
229227

230228
def namespace(self, prefix: str) -> Optional["URIRef"]:
@@ -538,14 +536,13 @@ def bind(self, prefix: str, namespace: "URIRef", override: bool = True) -> None:
538536
self.__namespace[prefix] = namespace
539537
else:
540538
# type error: Invalid index type "Optional[URIRef]" for "Dict[URIRef, str]"; expected type "URIRef"
541-
# type error: Incompatible types in assignment (expression has type "Optional[str]", target has type "str")
542-
self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index, assignment]
543-
bound_prefix, prefix
539+
self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index]
540+
bound_prefix, default=prefix
544541
)
545542
# type error: Invalid index type "Optional[str]" for "Dict[str, URIRef]"; expected type "str"
546543
# type error: Incompatible types in assignment (expression has type "Optional[URIRef]", target has type "URIRef")
547-
self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index, assignment]
548-
bound_namespace, namespace
544+
self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index]
545+
bound_namespace, default=namespace
549546
)
550547

551548
def namespace(self, prefix: str) -> Optional["URIRef"]:

rdflib/term.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
)
7070

7171
import rdflib
72+
import rdflib.util
7273
from rdflib.compat import long_type
7374

7475
if TYPE_CHECKING:
@@ -598,7 +599,7 @@ class Literal(Identifier):
598599
_value: Any
599600
_language: Optional[str]
600601
# NOTE: _datatype should maybe be of type URIRef, and not optional.
601-
_datatype: Optional[str]
602+
_datatype: Optional[URIRef]
602603
_ill_typed: Optional[bool]
603604
__slots__ = ("_language", "_datatype", "_value", "_ill_typed")
604605

@@ -624,7 +625,7 @@ def __new__(
624625
if lang is not None and not _is_valid_langtag(lang):
625626
raise ValueError(f"'{str(lang)}' is not a valid language tag!")
626627

627-
if datatype:
628+
if datatype is not None:
628629
datatype = URIRef(datatype)
629630

630631
value = None
@@ -633,7 +634,7 @@ def __new__(
633634
# create from another Literal instance
634635

635636
lang = lang or lexical_or_value.language
636-
if datatype:
637+
if datatype is not None:
637638
# override datatype
638639
value = _castLexicalToPython(lexical_or_value, datatype)
639640
else:
@@ -644,7 +645,7 @@ def __new__(
644645
# passed a string
645646
# try parsing lexical form of datatyped literal
646647
value = _castLexicalToPython(lexical_or_value, datatype)
647-
if datatype and datatype in _toPythonMapping:
648+
if datatype is not None and datatype in _toPythonMapping:
648649
# datatype is a recognized datatype IRI:
649650
# https://www.w3.org/TR/rdf11-concepts/#dfn-recognized-datatype-iris
650651
dt_uri: URIRef = URIRef(datatype)
@@ -661,10 +662,12 @@ def __new__(
661662
value = lexical_or_value
662663
_value, _datatype = _castPythonToLiteral(lexical_or_value, datatype)
663664

664-
datatype = datatype or _datatype
665+
_datatype = None if _datatype is None else URIRef(_datatype)
666+
667+
datatype = rdflib.util._coalesce(datatype, _datatype)
665668
if _value is not None:
666669
lexical_or_value = _value
667-
if datatype:
670+
if datatype is not None:
668671
lang = None
669672

670673
if isinstance(lexical_or_value, bytes):
@@ -729,7 +732,7 @@ def language(self) -> Optional[str]:
729732
return self._language
730733

731734
@property
732-
def datatype(self) -> Optional[str]:
735+
def datatype(self) -> Optional[URIRef]:
733736
return self._datatype
734737

735738
def __reduce__(
@@ -743,7 +746,7 @@ def __reduce__(
743746
def __getstate__(self) -> Tuple[None, Dict[str, Union[str, None]]]:
744747
return (None, dict(language=self.language, datatype=self.datatype))
745748

746-
def __setstate__(self, arg: Tuple[Any, Dict[str, str]]) -> None:
749+
def __setstate__(self, arg: Tuple[Any, Dict[str, Any]]) -> None:
747750
_, d = arg
748751
self._language = d["language"]
749752
self._datatype = d["datatype"]
@@ -1096,8 +1099,8 @@ def __gt__(self, other: Any) -> bool:
10961099

10971100
# plain-literals and xsd:string literals
10981101
# are "the same"
1099-
dtself = self.datatype or _XSD_STRING
1100-
dtother = other.datatype or _XSD_STRING
1102+
dtself = rdflib.util._coalesce(self.datatype, default=_XSD_STRING)
1103+
dtother = rdflib.util._coalesce(other.datatype, default=_XSD_STRING)
11011104

11021105
if dtself != dtother:
11031106
if rdflib.DAWG_LITERAL_COLLATION:
@@ -1129,9 +1132,9 @@ def __gt__(self, other: Any) -> bool:
11291132
# same language, same lexical form, check real dt
11301133
# plain-literals come before xsd:string!
11311134
if self.datatype != other.datatype:
1132-
if not self.datatype:
1135+
if self.datatype is None:
11331136
return False
1134-
elif not other.datatype:
1137+
elif other.datatype is None:
11351138
return True
11361139
else:
11371140
return self.datatype > other.datatype
@@ -1186,7 +1189,7 @@ def _comparable_to(self, other: Any) -> bool:
11861189
rich-compare with this literal
11871190
"""
11881191
if isinstance(other, Literal):
1189-
if self.datatype and other.datatype:
1192+
if self.datatype is not None and other.datatype is not None:
11901193
# two datatyped literals
11911194
if (
11921195
self.datatype not in XSDToPython
@@ -1247,7 +1250,7 @@ def __hash__(self) -> int: # type: ignore[override]
12471250
# Directly accessing the member is faster than the property.
12481251
if self._language:
12491252
res ^= hash(self._language.lower())
1250-
if self._datatype:
1253+
if self._datatype is not None:
12511254
res ^= hash(self._datatype)
12521255
return res
12531256

@@ -1342,8 +1345,8 @@ def eq(self, other: Any) -> bool:
13421345
if (self.language or "").lower() != (other.language or "").lower():
13431346
return False
13441347

1345-
dtself = self.datatype or _XSD_STRING
1346-
dtother = other.datatype or _XSD_STRING
1348+
dtself = rdflib.util._coalesce(self.datatype, default=_XSD_STRING)
1349+
dtother = rdflib.util._coalesce(other.datatype, default=_XSD_STRING)
13471350

13481351
if dtself == _XSD_STRING and dtother == _XSD_STRING:
13491352
# string/plain literals, compare on lexical form
@@ -1556,7 +1559,7 @@ def _literal_n3(
15561559

15571560
datatype = self.datatype
15581561
quoted_dt = None
1559-
if datatype:
1562+
if datatype is not None:
15601563
if qname_callback:
15611564
quoted_dt = qname_callback(datatype)
15621565
if not quoted_dt:
@@ -1906,16 +1909,18 @@ def _well_formed_negative_integer(lexical: Union[str, bytes], value: Any) -> boo
19061909
URIRef(_XSD_PFX + "token"),
19071910
)
19081911

1912+
_StrT = TypeVar("_StrT", bound=str)
1913+
19091914

19101915
def _py2literal(
19111916
obj: Any,
19121917
pType: Any, # noqa: N803
19131918
castFunc: Optional[Callable[[Any], Any]],
1914-
dType: Optional[str],
1915-
) -> Tuple[Any, Optional[str]]:
1916-
if castFunc:
1919+
dType: Optional[_StrT],
1920+
) -> Tuple[Any, Optional[_StrT]]:
1921+
if castFunc is not None:
19171922
return castFunc(obj), dType
1918-
elif dType:
1923+
elif dType is not None:
19191924
return obj, dType
19201925
else:
19211926
return obj, None
@@ -2062,7 +2067,7 @@ def _reset_bindings() -> None:
20622067

20632068

20642069
def _castLexicalToPython( # noqa: N802
2065-
lexical: Union[str, bytes], datatype: Optional[str]
2070+
lexical: Union[str, bytes], datatype: Optional[URIRef]
20662071
) -> Any:
20672072
"""
20682073
Map a lexical form to the value-space for the given datatype

0 commit comments

Comments
 (0)