-
Notifications
You must be signed in to change notification settings - Fork 50
Add tests for latest purl-spec #202
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4c237ce
f1fecb3
10b4df9
5f0924c
265754c
b8a6d84
83c1a6a
b3ca8b3
61e4bd5
47dd757
d563792
eafd33a
3b9c6df
48dee0d
dc77d35
2efa591
c127000
3a58ba5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "spec"] | ||
path = spec | ||
url = https://github.com/package-url/purl-spec.git |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -29,6 +29,7 @@ | |||||||
from collections.abc import Mapping | ||||||||
from typing import TYPE_CHECKING | ||||||||
from typing import Any | ||||||||
from typing import Optional | ||||||||
from typing import Union | ||||||||
from typing import overload | ||||||||
from urllib.parse import quote as _percent_quote | ||||||||
|
@@ -116,15 +117,18 @@ def normalize_namespace( | |||||||
|
||||||||
namespace_str = namespace if isinstance(namespace, str) else namespace.decode("utf-8") | ||||||||
namespace_str = namespace_str.strip().strip("/") | ||||||||
if ptype in ("bitbucket", "github", "pypi", "gitlab"): | ||||||||
if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"): | ||||||||
namespace_str = namespace_str.lower() | ||||||||
segments = [seg for seg in namespace_str.split("/") if seg.strip()] | ||||||||
segments_quoted = map(get_quoter(encode), segments) | ||||||||
return "/".join(segments_quoted) or None | ||||||||
|
||||||||
|
||||||||
def normalize_name( | ||||||||
name: AnyStr | None, ptype: str | None, encode: bool | None = True | ||||||||
name: AnyStr | None, | ||||||||
qualifiers: Union[Union[str, bytes], dict[str, str], None], | ||||||||
ptype: str | None, | ||||||||
encode: bool | None = True, | ||||||||
) -> str | None: | ||||||||
if not name: | ||||||||
return None | ||||||||
|
@@ -133,20 +137,38 @@ def normalize_name( | |||||||
quoter = get_quoter(encode) | ||||||||
name_str = quoter(name_str) | ||||||||
name_str = name_str.strip().strip("/") | ||||||||
if ptype in ("bitbucket", "github", "pypi", "gitlab"): | ||||||||
if ptype and ptype in ("mlflow"): | ||||||||
# MLflow purl names are case-sensitive for Azure ML, it is case sensitive and must be kept as-is in the package URL | ||||||||
# For Databricks, it is case insensitive and must be lowercased in the package URL | ||||||||
if isinstance(qualifiers, dict): | ||||||||
repo_url = qualifiers.get("repository_url") | ||||||||
if repo_url and "azureml" in repo_url.lower(): | ||||||||
return name_str | ||||||||
if repo_url and "databricks" in repo_url.lower(): | ||||||||
return name_str.lower() | ||||||||
if isinstance(qualifiers, str): | ||||||||
if "azureml" in qualifiers.lower(): | ||||||||
return name_str | ||||||||
if "databricks" in qualifiers.lower(): | ||||||||
return name_str.lower() | ||||||||
Comment on lines
+141
to
+153
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should abstract this out into a separate function. |
||||||||
if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"): | ||||||||
name_str = name_str.lower() | ||||||||
if ptype == "pypi": | ||||||||
name_str = name_str.replace("_", "-") | ||||||||
return name_str or None | ||||||||
|
||||||||
|
||||||||
def normalize_version(version: AnyStr | None, encode: bool | None = True) -> str | None: | ||||||||
def normalize_version( | ||||||||
version: AnyStr | None, ptype: Optional[Union[str, bytes]], encode: bool | None = True | ||||||||
) -> str | None: | ||||||||
if not version: | ||||||||
return None | ||||||||
|
||||||||
version_str = version if isinstance(version, str) else version.decode("utf-8") | ||||||||
quoter = get_quoter(encode) | ||||||||
version_str = quoter(version_str.strip()) | ||||||||
if ptype and isinstance(ptype, str) and ptype in ("huggingface"): | ||||||||
return version_str.lower() | ||||||||
return version_str or None | ||||||||
|
||||||||
|
||||||||
|
@@ -304,8 +326,8 @@ def normalize( | |||||||
""" | ||||||||
type_norm = normalize_type(type, encode) | ||||||||
namespace_norm = normalize_namespace(namespace, type_norm, encode) | ||||||||
name_norm = normalize_name(name, type_norm, encode) | ||||||||
version_norm = normalize_version(version, encode) | ||||||||
name_norm = normalize_name(name, qualifiers, type_norm, encode) | ||||||||
version_norm = normalize_version(version, type, encode) | ||||||||
qualifiers_norm = normalize_qualifiers(qualifiers, encode) | ||||||||
subpath_norm = normalize_subpath(subpath, encode) | ||||||||
return type_norm, namespace_norm, name_norm, version_norm, qualifiers_norm, subpath_norm | ||||||||
|
@@ -464,8 +486,21 @@ def from_string(cls, purl: str) -> Self: | |||||||
if not type_ or not sep: | ||||||||
raise ValueError(f"purl is missing the required type component: {purl!r}.") | ||||||||
|
||||||||
if not all(c in string.ascii_letters + string.digits + "-._" for c in type_): | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
raise ValueError( | ||||||||
f"purl type must be composed only of ASCII letters and numbers, period, dash and underscore: {type_!r}." | ||||||||
) | ||||||||
|
||||||||
if ":" in type_: | ||||||||
raise ValueError(f"purl type cannot contain a colon: {type_!r}.") | ||||||||
Comment on lines
+494
to
+495
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This part is not reachable because |
||||||||
|
||||||||
if type_[0] in string.digits: | ||||||||
raise ValueError(f"purl type cannot start with a number: {type_!r}.") | ||||||||
|
||||||||
type_ = type_.lower() | ||||||||
|
||||||||
original_remainder = remainder | ||||||||
|
||||||||
scheme, authority, path, qualifiers_str, subpath = _urlsplit( | ||||||||
url=remainder, scheme="", allow_fragments=True | ||||||||
) | ||||||||
|
@@ -480,7 +515,9 @@ def from_string(cls, purl: str) -> Self: | |||||||
path = authority + ":" + path | ||||||||
|
||||||||
if scheme: | ||||||||
path = scheme + ":" + path | ||||||||
# This is a way to preserve the casing of the original scheme | ||||||||
original_scheme = original_remainder.split(":", 1)[0] | ||||||||
path = original_scheme + ":" + path | ||||||||
|
||||||||
path = path.lstrip("/") | ||||||||
|
||||||||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -0,0 +1,160 @@ | ||||
# Copyright (c) the purl authors | ||||
# SPDX-License-Identifier: MIT | ||||
# | ||||
# Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
# of this software and associated documentation files (the "Software"), to deal | ||||
# in the Software without restriction, including without limitation the rights | ||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||
# copies of the Software, and to permit persons to whom the Software is | ||||
# furnished to do so, subject to the following conditions: | ||||
# | ||||
# The above copyright notice and this permission notice shall be included in all | ||||
# copies or substantial portions of the Software. | ||||
# | ||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
# SOFTWARE. | ||||
|
||||
# Visit https://github.com/package-url/packageurl-python for support and | ||||
# download. | ||||
|
||||
import json | ||||
import os | ||||
|
||||
import pytest | ||||
|
||||
from packageurl import PackageURL | ||||
|
||||
current_dir = os.path.dirname(__file__) | ||||
root_dir = os.path.abspath(os.path.join(current_dir, "..")) | ||||
spec_file_path = os.path.join(root_dir, "spec", "tests", "spec", "specification-test.json") | ||||
|
||||
valid_purl_types_file = os.path.join(root_dir, "spec", "purl-types-index.json") | ||||
|
||||
|
||||
with open(spec_file_path, "r", encoding="utf-8") as f: | ||||
test_cases = json.load(f) | ||||
|
||||
with open(valid_purl_types_file, "r", encoding="utf-8") as f: | ||||
valid_purl_types = json.load(f) | ||||
Comment on lines
+42
to
+43
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we using |
||||
|
||||
tests = test_cases["tests"] | ||||
|
||||
parse_tests = [t for t in tests if t["test_type"] == "parse"] | ||||
build_tests = [t for t in tests if t["test_type"] == "build"] | ||||
|
||||
|
||||
@pytest.mark.parametrize( | ||||
"description, input_str, expected_output, expected_failure", | ||||
[ | ||||
(t["description"], t["input"], t["expected_output"], t["expected_failure"]) | ||||
for t in parse_tests | ||||
], | ||||
) | ||||
def test_parse(description, input_str, expected_output, expected_failure): | ||||
if expected_failure: | ||||
with pytest.raises(Exception): | ||||
PackageURL.from_string(input_str) | ||||
# assert None ==PackageURL.from_string(input_str) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
else: | ||||
result = PackageURL.from_string(input_str) | ||||
assert result.to_string() == expected_output | ||||
|
||||
|
||||
@pytest.mark.parametrize( | ||||
"description, input_dict, expected_output, expected_failure", | ||||
[ | ||||
(t["description"], t["input"], t["expected_output"], t["expected_failure"]) | ||||
for t in build_tests | ||||
], | ||||
) | ||||
def test_build(description, input_dict, expected_output, expected_failure): | ||||
kwargs = { | ||||
"type": input_dict.get("type"), | ||||
"namespace": input_dict.get("namespace"), | ||||
"name": input_dict.get("name"), | ||||
"version": input_dict.get("version"), | ||||
"qualifiers": input_dict.get("qualifiers"), | ||||
"subpath": input_dict.get("subpath"), | ||||
} | ||||
|
||||
if expected_failure: | ||||
with pytest.raises(Exception): | ||||
PackageURL(**kwargs).to_string() | ||||
else: | ||||
purl = PackageURL(**kwargs) | ||||
assert purl.to_string() == expected_output | ||||
|
||||
|
||||
def load_spec_files(spec_dir): | ||||
""" | ||||
Load all JSON files from the given directory into a dictionary. | ||||
Key = filename, Value = parsed JSON content | ||||
""" | ||||
spec_data = {} | ||||
for filename in os.listdir(spec_dir): | ||||
if filename.endswith("-test.json"): | ||||
filepath = os.path.join(spec_dir, filename) | ||||
with open(filepath, "r", encoding="utf-8") as f: | ||||
try: | ||||
data = json.load(f) | ||||
spec_data[filename] = data["tests"] | ||||
except json.JSONDecodeError as e: | ||||
print(f"Error parsing {filename}: {e}") | ||||
return spec_data | ||||
|
||||
|
||||
SPEC_DIR = os.path.join(os.path.dirname(__file__), "..", "spec", "tests", "types") | ||||
spec_dict = load_spec_files(SPEC_DIR) | ||||
|
||||
flattened_cases = [] | ||||
for filename, cases in spec_dict.items(): | ||||
for case in cases: | ||||
flattened_cases.append((filename, case["description"], case)) | ||||
Comment on lines
+111
to
+117
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's move this to the top. |
||||
|
||||
|
||||
@pytest.mark.parametrize("filename,description,test_case", flattened_cases) | ||||
def test_package_type_case(filename, description, test_case): | ||||
test_type = test_case["test_type"] | ||||
expected_failure = test_case.get("expected_failure", False) | ||||
|
||||
if expected_failure: | ||||
with pytest.raises(Exception): | ||||
run_test_case(test_case, test_type, description) | ||||
else: | ||||
run_test_case(test_case, test_type, description) | ||||
|
||||
|
||||
def run_test_case(case, test_type, desc): | ||||
if test_type == "parse": | ||||
purl = PackageURL.from_string(case["input"]) | ||||
expected = case["expected_output"] | ||||
assert purl.type == expected["type"] | ||||
assert purl.namespace == expected["namespace"] | ||||
assert purl.name == expected["name"] | ||||
assert purl.version == expected["version"] | ||||
if expected["qualifiers"]: | ||||
assert purl.qualifiers == expected["qualifiers"] | ||||
else: | ||||
assert not purl.qualifiers | ||||
assert purl.subpath == expected["subpath"] | ||||
|
||||
elif test_type == "roundtrip": | ||||
purl = PackageURL.from_string(case["input"]) | ||||
assert purl.to_string() == case["expected_output"] | ||||
|
||||
elif test_type == "build": | ||||
input_data = case["input"] | ||||
purl = PackageURL( | ||||
type=input_data["type"], | ||||
namespace=input_data["namespace"], | ||||
name=input_data["name"], | ||||
version=input_data["version"], | ||||
qualifiers=input_data.get("qualifiers"), | ||||
subpath=input_data.get("subpath"), | ||||
) | ||||
assert purl.to_string() == case["expected_output"] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we're only using purl-spec test data, we should keep it inside
tests/data/
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess we should not keep the complete spec in tests