Skip to content

Add tests for latest purl-spec #202

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive

- name: Setup Python environment
uses: actions/setup-python@v5
Expand All @@ -23,7 +25,7 @@ jobs:
- name: Validate
run: |
isort --check-only src/ tests/
black --check --line-length 100 .
black --check --line-length 100 src/ tests/
mypy

build-and-test:
Expand All @@ -44,6 +46,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive

- name: Setup Python environment
uses: actions/setup-python@v5
Expand All @@ -56,7 +60,7 @@ jobs:
pip install -e .[build]

- name: Test
run: py.test -vvs
run: py.test -vvs --ignore=spec/

- name: Build
run: python setup.py build sdist bdist_wheel
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "spec"]
path = spec
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we're only using purl-spec test data, we should keep it inside tests/data/.

Suggested change
path = spec
path = tests/data/purl-spec/

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we should not keep the complete spec in tests

url = https://github.com/package-url/purl-spec.git
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
PYTHON_EXE?=python3
ACTIVATE?=. bin/activate;
VIRTUALENV_PYZ=thirdparty/virtualenv.pyz
BLACK_ARGS=--exclude=".cache|lib|bin|var" --line-length 100
BLACK_ARGS=src/ --exclude="\.cache|lib|bin|var" --line-length 100

virtualenv:
@echo "-> Bootstrap the virtualenv with PYTHON_EXE=${PYTHON_EXE}"
Expand All @@ -52,7 +52,7 @@ isort:

black:
@echo "-> Apply black code formatter"
@${ACTIVATE} black ${BLACK_ARGS} .
@${ACTIVATE} black ${BLACK_ARGS}

mypy:
@echo "-> Type check the Python code."
Expand Down
1 change: 1 addition & 0 deletions spec
Submodule spec added at a627e0
51 changes: 44 additions & 7 deletions src/packageurl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from collections.abc import Mapping
from typing import TYPE_CHECKING
from typing import Any
from typing import Optional
from typing import Union
from typing import overload
from urllib.parse import quote as _percent_quote
Expand Down Expand Up @@ -116,15 +117,18 @@ def normalize_namespace(

namespace_str = namespace if isinstance(namespace, str) else namespace.decode("utf-8")
namespace_str = namespace_str.strip().strip("/")
if ptype in ("bitbucket", "github", "pypi", "gitlab"):
if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"):
namespace_str = namespace_str.lower()
segments = [seg for seg in namespace_str.split("/") if seg.strip()]
segments_quoted = map(get_quoter(encode), segments)
return "/".join(segments_quoted) or None


def normalize_name(
name: AnyStr | None, ptype: str | None, encode: bool | None = True
name: AnyStr | None,
qualifiers: Union[Union[str, bytes], dict[str, str], None],
ptype: str | None,
encode: bool | None = True,
) -> str | None:
if not name:
return None
Expand All @@ -133,20 +137,38 @@ def normalize_name(
quoter = get_quoter(encode)
name_str = quoter(name_str)
name_str = name_str.strip().strip("/")
if ptype in ("bitbucket", "github", "pypi", "gitlab"):
if ptype and ptype in ("mlflow"):
# MLflow purl names are case-sensitive for Azure ML, it is case sensitive and must be kept as-is in the package URL
# For Databricks, it is case insensitive and must be lowercased in the package URL
if isinstance(qualifiers, dict):
repo_url = qualifiers.get("repository_url")
if repo_url and "azureml" in repo_url.lower():
return name_str
if repo_url and "databricks" in repo_url.lower():
return name_str.lower()
if isinstance(qualifiers, str):
if "azureml" in qualifiers.lower():
return name_str
if "databricks" in qualifiers.lower():
return name_str.lower()
Comment on lines +141 to +153
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should abstract this out into a separate function.

if ptype in ("bitbucket", "github", "pypi", "gitlab", "composer"):
name_str = name_str.lower()
if ptype == "pypi":
name_str = name_str.replace("_", "-")
return name_str or None


def normalize_version(version: AnyStr | None, encode: bool | None = True) -> str | None:
def normalize_version(
version: AnyStr | None, ptype: Optional[Union[str, bytes]], encode: bool | None = True
) -> str | None:
if not version:
return None

version_str = version if isinstance(version, str) else version.decode("utf-8")
quoter = get_quoter(encode)
version_str = quoter(version_str.strip())
if ptype and isinstance(ptype, str) and ptype in ("huggingface"):
return version_str.lower()
return version_str or None


Expand Down Expand Up @@ -304,8 +326,8 @@ def normalize(
"""
type_norm = normalize_type(type, encode)
namespace_norm = normalize_namespace(namespace, type_norm, encode)
name_norm = normalize_name(name, type_norm, encode)
version_norm = normalize_version(version, encode)
name_norm = normalize_name(name, qualifiers, type_norm, encode)
version_norm = normalize_version(version, type, encode)
qualifiers_norm = normalize_qualifiers(qualifiers, encode)
subpath_norm = normalize_subpath(subpath, encode)
return type_norm, namespace_norm, name_norm, version_norm, qualifiers_norm, subpath_norm
Expand Down Expand Up @@ -464,8 +486,21 @@ def from_string(cls, purl: str) -> Self:
if not type_ or not sep:
raise ValueError(f"purl is missing the required type component: {purl!r}.")

if not all(c in string.ascii_letters + string.digits + "-._" for c in type_):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if not all(c in string.ascii_letters + string.digits + "-._" for c in type_):
valid_chars = string.ascii_letters + string.digits + ".-_"
if not all(c in valid_chars for c in type_):

raise ValueError(
f"purl type must be composed only of ASCII letters and numbers, period, dash and underscore: {type_!r}."
)

if ":" in type_:
raise ValueError(f"purl type cannot contain a colon: {type_!r}.")
Comment on lines +494 to +495
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part is not reachable because : is not present in valid_chars


if type_[0] in string.digits:
raise ValueError(f"purl type cannot start with a number: {type_!r}.")

type_ = type_.lower()

original_remainder = remainder

scheme, authority, path, qualifiers_str, subpath = _urlsplit(
url=remainder, scheme="", allow_fragments=True
)
Expand All @@ -480,7 +515,9 @@ def from_string(cls, purl: str) -> Self:
path = authority + ":" + path

if scheme:
path = scheme + ":" + path
# This is a way to preserve the casing of the original scheme
original_scheme = original_remainder.split(":", 1)[0]
path = original_scheme + ":" + path

path = path.lstrip("/")

Expand Down
160 changes: 160 additions & 0 deletions tests/test_purl_spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# Copyright (c) the purl authors
# SPDX-License-Identifier: MIT
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# Visit https://github.com/package-url/packageurl-python for support and
# download.

import json
import os

import pytest

from packageurl import PackageURL

current_dir = os.path.dirname(__file__)
root_dir = os.path.abspath(os.path.join(current_dir, ".."))
spec_file_path = os.path.join(root_dir, "spec", "tests", "spec", "specification-test.json")

valid_purl_types_file = os.path.join(root_dir, "spec", "purl-types-index.json")


with open(spec_file_path, "r", encoding="utf-8") as f:
test_cases = json.load(f)

with open(valid_purl_types_file, "r", encoding="utf-8") as f:
valid_purl_types = json.load(f)
Comment on lines +42 to +43
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we using valid_purl_types any where in test?


tests = test_cases["tests"]

parse_tests = [t for t in tests if t["test_type"] == "parse"]
build_tests = [t for t in tests if t["test_type"] == "build"]


@pytest.mark.parametrize(
"description, input_str, expected_output, expected_failure",
[
(t["description"], t["input"], t["expected_output"], t["expected_failure"])
for t in parse_tests
],
)
def test_parse(description, input_str, expected_output, expected_failure):
if expected_failure:
with pytest.raises(Exception):
PackageURL.from_string(input_str)
# assert None ==PackageURL.from_string(input_str)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# assert None ==PackageURL.from_string(input_str)

else:
result = PackageURL.from_string(input_str)
assert result.to_string() == expected_output


@pytest.mark.parametrize(
"description, input_dict, expected_output, expected_failure",
[
(t["description"], t["input"], t["expected_output"], t["expected_failure"])
for t in build_tests
],
)
def test_build(description, input_dict, expected_output, expected_failure):
kwargs = {
"type": input_dict.get("type"),
"namespace": input_dict.get("namespace"),
"name": input_dict.get("name"),
"version": input_dict.get("version"),
"qualifiers": input_dict.get("qualifiers"),
"subpath": input_dict.get("subpath"),
}

if expected_failure:
with pytest.raises(Exception):
PackageURL(**kwargs).to_string()
else:
purl = PackageURL(**kwargs)
assert purl.to_string() == expected_output


def load_spec_files(spec_dir):
"""
Load all JSON files from the given directory into a dictionary.
Key = filename, Value = parsed JSON content
"""
spec_data = {}
for filename in os.listdir(spec_dir):
if filename.endswith("-test.json"):
filepath = os.path.join(spec_dir, filename)
with open(filepath, "r", encoding="utf-8") as f:
try:
data = json.load(f)
spec_data[filename] = data["tests"]
except json.JSONDecodeError as e:
print(f"Error parsing {filename}: {e}")
return spec_data


SPEC_DIR = os.path.join(os.path.dirname(__file__), "..", "spec", "tests", "types")
spec_dict = load_spec_files(SPEC_DIR)

flattened_cases = []
for filename, cases in spec_dict.items():
for case in cases:
flattened_cases.append((filename, case["description"], case))
Comment on lines +111 to +117
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's move this to the top.



@pytest.mark.parametrize("filename,description,test_case", flattened_cases)
def test_package_type_case(filename, description, test_case):
test_type = test_case["test_type"]
expected_failure = test_case.get("expected_failure", False)

if expected_failure:
with pytest.raises(Exception):
run_test_case(test_case, test_type, description)
else:
run_test_case(test_case, test_type, description)


def run_test_case(case, test_type, desc):
if test_type == "parse":
purl = PackageURL.from_string(case["input"])
expected = case["expected_output"]
assert purl.type == expected["type"]
assert purl.namespace == expected["namespace"]
assert purl.name == expected["name"]
assert purl.version == expected["version"]
if expected["qualifiers"]:
assert purl.qualifiers == expected["qualifiers"]
else:
assert not purl.qualifiers
assert purl.subpath == expected["subpath"]

elif test_type == "roundtrip":
purl = PackageURL.from_string(case["input"])
assert purl.to_string() == case["expected_output"]

elif test_type == "build":
input_data = case["input"]
purl = PackageURL(
type=input_data["type"],
namespace=input_data["namespace"],
name=input_data["name"],
version=input_data["version"],
qualifiers=input_data.get("qualifiers"),
subpath=input_data.get("subpath"),
)
assert purl.to_string() == case["expected_output"]
Loading