Skip to content

Commit 958b9a1

Browse files
gjhigginsaucampia
andauthored
Fix for issue1957 sparql parser percent encoded reserved chars (#1959)
Seems like [`_hexExpand`](https://github.com/RDFLib/rdflib/blob/6ed2ef48ed38679bcdafe7cae250a2ef4b315e7b/rdflib/plugins/sparql/parser.py#L230) internal SPARQL parser function inappropriately expands [percent-encoded reserved characters](https://en.wikipedia.org/wiki/Percent-encoding). Removed it as this does not seem like it is needed. Merged with only one review as this is fairly well tested and not a very complicated fix. Co-authored-by: Iwan Aucamp <aucampia@gmail.com>
1 parent 8e24878 commit 958b9a1

File tree

2 files changed

+135
-7
lines changed

2 files changed

+135
-7
lines changed

rdflib/plugins/sparql/parser.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -226,13 +226,6 @@ def expandCollection(terms):
226226
)
227227

228228

229-
def _hexExpand(match):
230-
return chr(int(match.group(0)[1:], 16))
231-
232-
233-
PN_LOCAL.setParseAction(lambda x: re.sub("(%s)" % PERCENT_re, _hexExpand, x[0]))
234-
235-
236229
# [141] PNAME_LN ::= PNAME_NS PN_LOCAL
237230
PNAME_LN = PNAME_NS + Param("localname", PN_LOCAL.leaveWhitespace())
238231

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import itertools
2+
import logging
3+
from contextlib import ExitStack
4+
from typing import Type, Union
5+
6+
import pyparsing
7+
import pytest
8+
from pyparsing import Optional
9+
10+
import rdflib
11+
from rdflib import Graph
12+
from rdflib.namespace import Namespace
13+
from rdflib.term import URIRef
14+
15+
RESERVED_PCHARS = [
16+
"%20",
17+
"%21",
18+
"%23",
19+
"%24",
20+
"%25",
21+
"%26",
22+
"%27",
23+
"%28",
24+
"%29",
25+
"%2A",
26+
"%2B",
27+
"%2C",
28+
"%2F",
29+
"%3A",
30+
"%3B",
31+
"%3D",
32+
"%3F",
33+
"%40",
34+
"%5B",
35+
"%5D",
36+
]
37+
38+
39+
@pytest.mark.parametrize(
40+
"reserved_char_percent_encoded",
41+
RESERVED_PCHARS,
42+
)
43+
def test_sparql_parse_reserved_char_percent_encoded(reserved_char_percent_encoded):
44+
data = f"""@prefix : <https://www.example.co/reserved/language#> .
45+
46+
<https://www.example.co/reserved/root> :_id "01G39WKRH76BGY5D3SKDHJP2SX" ;
47+
:transcript{reserved_char_percent_encoded}data [ :_id "01G39WKRH7JYRX78X7FG4RCNYF" ;
48+
:_key "transcript{reserved_char_percent_encoded}data" ;
49+
:value "value" ;
50+
:value_id "01G39WKRH7PVK1DXQHWT08DZA8" ] ."""
51+
52+
q = f"""PREFIX : <https://www.example.co/reserved/language#>
53+
SELECT ?o
54+
WHERE {{ ?s :transcript{reserved_char_percent_encoded}data/:value ?o . }}"""
55+
56+
g = rdflib.Graph()
57+
g.parse(data=data, format="ttl")
58+
res = g.query(q)
59+
60+
assert list(res)[0][0] == rdflib.term.Literal("value")
61+
62+
assert reserved_char_percent_encoded in str(
63+
rdflib.plugins.sparql.parser.parseQuery(q)
64+
)
65+
66+
67+
PNAME_PREFIX = Namespace("https://example.com/test_pnames/")
68+
69+
70+
@pytest.fixture(scope="module")
71+
def blank_graph() -> Graph:
72+
return Graph()
73+
74+
75+
@pytest.mark.parametrize(
76+
["pname_ns", "pname", "expected_result"],
77+
itertools.chain(
78+
[
79+
("eg", "invalid/PN_PREFIX", pyparsing.exceptions.ParseException),
80+
("", "eg:a", Exception),
81+
("", ":invalid PN_LOCAL", pyparsing.exceptions.ParseException),
82+
("", ":invalid/PN_LOCAL", pyparsing.exceptions.ParseException),
83+
("", ":a:b:c", PNAME_PREFIX["a:b:c"]),
84+
("", ":", URIRef(f"{PNAME_PREFIX}")),
85+
("", ":a", PNAME_PREFIX.a),
86+
("eg", " eg:obj ", PNAME_PREFIX.obj),
87+
("", " :obj ", PNAME_PREFIX.obj),
88+
("eg", " \t eg:obj \t ", PNAME_PREFIX.obj),
89+
("", " \n :obj \n ", PNAME_PREFIX.obj),
90+
("eg", "eg:", URIRef(f"{PNAME_PREFIX}")),
91+
("eg", "eg:a", PNAME_PREFIX.a),
92+
("", ":transcript%20data", PNAME_PREFIX["transcript%20data"]),
93+
],
94+
(
95+
("", f":aaa{pchar}zzz", PNAME_PREFIX[f"aaa{pchar}zzz"])
96+
for pchar in RESERVED_PCHARS
97+
),
98+
),
99+
)
100+
def test_pnames(
101+
pname_ns: str,
102+
pname: str,
103+
expected_result: Union[URIRef, Type[Exception]],
104+
blank_graph: Graph,
105+
) -> None:
106+
"""
107+
The given pname produces the expected result.
108+
"""
109+
catcher: Optional[pytest.ExceptionInfo[Exception]] = None
110+
111+
with ExitStack() as xstack:
112+
if isinstance(expected_result, type) and issubclass(expected_result, Exception):
113+
catcher = xstack.enter_context(pytest.raises(expected_result))
114+
query_string = f"""\
115+
PREFIX {pname_ns}: <{PNAME_PREFIX}>
116+
117+
CONSTRUCT {{
118+
<example:_subject> <example:_predicate> {pname}.
119+
}} WHERE {{}}
120+
"""
121+
query_result = blank_graph.query(query_string)
122+
assert query_result.type == "CONSTRUCT"
123+
assert isinstance(query_result.graph, Graph)
124+
triples = list(query_result.graph.triples((None, None, None)))
125+
assert len(triples) == 1
126+
triple = triples[0]
127+
result = triple[2]
128+
logging.debug("result = %s", result)
129+
130+
if catcher is not None:
131+
assert isinstance(catcher, pytest.ExceptionInfo)
132+
assert catcher.value is not None
133+
else:
134+
assert isinstance(expected_result, URIRef)
135+
assert expected_result == result

0 commit comments

Comments
 (0)