From 6015d6b36e5f07100832ec286ac8ba37f4ce6e59 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 2 Dec 2024 15:16:23 +0000 Subject: [PATCH 01/13] Add python 3.13 support --- .github/workflows/main.yml | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8a87e6d8..1490aac6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.9', '3.10', '3.11', '3.12' ] + python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ] max-parallel: 4 steps: diff --git a/pyproject.toml b/pyproject.toml index d16c7b38..78363ff6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3 :: Only", ] From d7785a14cd8114a0ef7249dfb53405211213ae45 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 2 Dec 2024 15:21:38 +0000 Subject: [PATCH 02/13] Fix unittests during GHA --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1490aac6..5e6e9d7f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -37,4 +37,4 @@ jobs: flake8 medcat2 - name: Test run: | - timeout 10m python -m unittest medcat2 + timeout 10m python -m unittest discover From 039522ec870e7d7759df59ba16fd6d751f950f33 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 2 Dec 2024 15:24:53 +0000 Subject: [PATCH 03/13] Add missing schema file --- tests/resources/mct2_cdb/.schema.json | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/resources/mct2_cdb/.schema.json diff --git a/tests/resources/mct2_cdb/.schema.json b/tests/resources/mct2_cdb/.schema.json new file mode 100644 index 00000000..eb026437 --- /dev/null +++ b/tests/resources/mct2_cdb/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.cdb.cdb.CDB", "init-parts": ["config"]} \ No newline at end of file From 22f709a0252dc68473cb9e843aa47ef20a7c53c2 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 2 Dec 2024 15:37:19 +0000 Subject: [PATCH 04/13] Add all mising schema files for test time resources --- tests/resources/mct2_cdb/config/.schema.json | 1 + tests/resources/mct2_cdb/config/annotation_output/.schema.json | 1 + tests/resources/mct2_cdb/config/cdb_maker/.schema.json | 1 + tests/resources/mct2_cdb/config/components/.schema.json | 1 + tests/resources/mct2_cdb/config/components/linking/.schema.json | 1 + .../mct2_cdb/config/components/linking/filters/.schema.json | 1 + tests/resources/mct2_cdb/config/components/ner/.schema.json | 1 + tests/resources/mct2_cdb/config/components/tagging/.schema.json | 1 + .../mct2_cdb/config/components/token_normalizing/.schema.json | 1 + tests/resources/mct2_cdb/config/general/.schema.json | 1 + tests/resources/mct2_cdb/config/general/nlp/.schema.json | 1 + tests/resources/mct2_cdb/config/preprocessing/.schema.json | 1 + 12 files changed, 12 insertions(+) create mode 100644 tests/resources/mct2_cdb/config/.schema.json create mode 100644 tests/resources/mct2_cdb/config/annotation_output/.schema.json create mode 100644 tests/resources/mct2_cdb/config/cdb_maker/.schema.json create mode 100644 tests/resources/mct2_cdb/config/components/.schema.json create mode 100644 tests/resources/mct2_cdb/config/components/linking/.schema.json create mode 100644 tests/resources/mct2_cdb/config/components/linking/filters/.schema.json create mode 100644 tests/resources/mct2_cdb/config/components/ner/.schema.json create mode 100644 tests/resources/mct2_cdb/config/components/tagging/.schema.json create mode 100644 tests/resources/mct2_cdb/config/components/token_normalizing/.schema.json create mode 100644 tests/resources/mct2_cdb/config/general/.schema.json create mode 100644 tests/resources/mct2_cdb/config/general/nlp/.schema.json create mode 100644 tests/resources/mct2_cdb/config/preprocessing/.schema.json diff --git a/tests/resources/mct2_cdb/config/.schema.json b/tests/resources/mct2_cdb/config/.schema.json new file mode 100644 index 00000000..f3ab3400 --- /dev/null +++ b/tests/resources/mct2_cdb/config/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.config.config.Config", "init-parts": []} \ No newline at end of file diff --git a/tests/resources/mct2_cdb/config/annotation_output/.schema.json b/tests/resources/mct2_cdb/config/annotation_output/.schema.json new file mode 100644 index 00000000..952b569d --- /dev/null +++ b/tests/resources/mct2_cdb/config/annotation_output/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.config.config.AnnotationOutput", "init-parts": []} \ No newline at end of file diff --git a/tests/resources/mct2_cdb/config/cdb_maker/.schema.json b/tests/resources/mct2_cdb/config/cdb_maker/.schema.json new file mode 100644 index 00000000..f0335734 --- /dev/null +++ b/tests/resources/mct2_cdb/config/cdb_maker/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.config.config.CDBMaker", "init-parts": []} \ No newline at end of file diff --git a/tests/resources/mct2_cdb/config/components/.schema.json b/tests/resources/mct2_cdb/config/components/.schema.json new file mode 100644 index 00000000..a0406f2c --- /dev/null +++ b/tests/resources/mct2_cdb/config/components/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.config.config.Components", "init-parts": []} \ No newline at end of file diff --git a/tests/resources/mct2_cdb/config/components/linking/.schema.json b/tests/resources/mct2_cdb/config/components/linking/.schema.json new file mode 100644 index 00000000..948cc350 --- /dev/null +++ b/tests/resources/mct2_cdb/config/components/linking/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.config.config.Linking", "init-parts": []} \ No newline at end of file diff --git a/tests/resources/mct2_cdb/config/components/linking/filters/.schema.json b/tests/resources/mct2_cdb/config/components/linking/filters/.schema.json new file mode 100644 index 00000000..545a901a --- /dev/null +++ b/tests/resources/mct2_cdb/config/components/linking/filters/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.config.config.LinkingFilters", "init-parts": []} \ No newline at end of file diff --git a/tests/resources/mct2_cdb/config/components/ner/.schema.json b/tests/resources/mct2_cdb/config/components/ner/.schema.json new file mode 100644 index 00000000..769c365f --- /dev/null +++ b/tests/resources/mct2_cdb/config/components/ner/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.config.config.Ner", "init-parts": []} \ No newline at end of file diff --git a/tests/resources/mct2_cdb/config/components/tagging/.schema.json b/tests/resources/mct2_cdb/config/components/tagging/.schema.json new file mode 100644 index 00000000..cb276470 --- /dev/null +++ b/tests/resources/mct2_cdb/config/components/tagging/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.config.config.CoreComponentConfig", "init-parts": []} \ No newline at end of file diff --git a/tests/resources/mct2_cdb/config/components/token_normalizing/.schema.json b/tests/resources/mct2_cdb/config/components/token_normalizing/.schema.json new file mode 100644 index 00000000..cb276470 --- /dev/null +++ b/tests/resources/mct2_cdb/config/components/token_normalizing/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.config.config.CoreComponentConfig", "init-parts": []} \ No newline at end of file diff --git a/tests/resources/mct2_cdb/config/general/.schema.json b/tests/resources/mct2_cdb/config/general/.schema.json new file mode 100644 index 00000000..907d1f01 --- /dev/null +++ b/tests/resources/mct2_cdb/config/general/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.config.config.General", "init-parts": []} \ No newline at end of file diff --git a/tests/resources/mct2_cdb/config/general/nlp/.schema.json b/tests/resources/mct2_cdb/config/general/nlp/.schema.json new file mode 100644 index 00000000..b48441ab --- /dev/null +++ b/tests/resources/mct2_cdb/config/general/nlp/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.config.config.NLPConfig", "init-parts": []} \ No newline at end of file diff --git a/tests/resources/mct2_cdb/config/preprocessing/.schema.json b/tests/resources/mct2_cdb/config/preprocessing/.schema.json new file mode 100644 index 00000000..ff968041 --- /dev/null +++ b/tests/resources/mct2_cdb/config/preprocessing/.schema.json @@ -0,0 +1 @@ +{"serialised-class": "medcat2.config.config.Preprocessing", "init-parts": []} \ No newline at end of file From ecb1d7235fe17421b539ad2b3592bdb693821596 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 2 Dec 2024 15:43:11 +0000 Subject: [PATCH 05/13] Fix pipe-union usage in tests --- tests/stats/test_stats.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/stats/test_stats.py b/tests/stats/test_stats.py index 49a990ec..a3e19737 100644 --- a/tests/stats/test_stats.py +++ b/tests/stats/test_stats.py @@ -1,3 +1,4 @@ +from typing import Union import os import json @@ -41,7 +42,7 @@ def test_check_export_is_valid(self): detexted_value = text[start:end] self.assertEqual(detexted_value, value) - def assert_perfect_dict(self, d: dict[str, float | int]) -> None: + def assert_perfect_dict(self, d: dict[str, Union[float, int]]) -> None: for cui, f1 in d.items(): with self.subTest(cui): self.assertEqual(f1, 1) From 85cffe1a059c7f82772be53ca43ef15312568faa Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 2 Dec 2024 15:48:34 +0000 Subject: [PATCH 06/13] Allow all 5 to run in parallel --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5e6e9d7f..f4666214 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ] - max-parallel: 4 + max-parallel: 5 steps: - uses: actions/checkout@v4 From db7d7d2d4fdf43981badcaa182984d5f47a01d50 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 19 May 2025 12:12:40 +0100 Subject: [PATCH 07/13] Add debug output to uv sync --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index edabd91b..08f8be53 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,7 +23,7 @@ jobs: run: uv python install ${{ matrix.python-version }} - name: Install the project run: | - uv sync --all-extras --dev + uv --verbose sync --all-extras --dev uv run python -m ensurepip uv run python -m pip install --upgrade pip - name: Check types From 4043576cdea6791cbb48c1c9156dae44a3ecd22a Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 19 May 2025 12:29:02 +0100 Subject: [PATCH 08/13] Bump supported ruff version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 704190fd..6b52043c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,7 +76,7 @@ dependencies = [ # Optional # projects. [project.optional-dependencies] # Optional dev = [ - "ruff~=0.1.7", + "ruff~=0.11.10", "mypy", "types-tqdm", "types-setuptools", From 714784c3f8dc585305029569860489b726f43237 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 19 May 2025 13:54:12 +0100 Subject: [PATCH 09/13] Add pip install dryrun for debug --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 08f8be53..c9bde2dc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,6 +23,7 @@ jobs: run: uv python install ${{ matrix.python-version }} - name: Install the project run: | + uv pip install --dry-run ".[spacy,deid,meta-cat]" uv --verbose sync --all-extras --dev uv run python -m ensurepip uv run python -m pip install --upgrade pip From 6a17064c91a2344e81d5895370ab8dd322d195dc Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 19 May 2025 13:56:37 +0100 Subject: [PATCH 10/13] Add explicit venv creation --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c9bde2dc..419e980f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -23,6 +23,7 @@ jobs: run: uv python install ${{ matrix.python-version }} - name: Install the project run: | + uv venv uv pip install --dry-run ".[spacy,deid,meta-cat]" uv --verbose sync --all-extras --dev uv run python -m ensurepip From eb7b6a7c25148e62a0a7bb7ee9a369cd3f8afad8 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 19 May 2025 13:59:59 +0100 Subject: [PATCH 11/13] DEBUG: Add PINNED dependency for scipy --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 6b52043c..04333099 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,7 @@ dev = [ "types-tqdm", "types-setuptools", "types-PyYAML", + "scipy==1.15.3", ] spacy = [ "spacy", From 1b8ee8b3f444521c57a843e7b26f85e53eddceee Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 19 May 2025 14:07:54 +0100 Subject: [PATCH 12/13] Specify scipy version for python 3.13 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 04333099..7663516a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,7 +81,7 @@ dev = [ "types-tqdm", "types-setuptools", "types-PyYAML", - "scipy==1.15.3", + "scipy~=1.15.0; python_version == '3.13'", ] spacy = [ "spacy", From 06954877001c75bc0369d09a5a279cc8ad40d724 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 19 May 2025 14:23:55 +0100 Subject: [PATCH 13/13] Explicitly specify spacy version for python 3.10+ --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7663516a..dcd381f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,7 +84,7 @@ dev = [ "scipy~=1.15.0; python_version == '3.13'", ] spacy = [ - "spacy", + "spacy>=3.8.4; python_version >= '3.10'", "spacy<3.8.4; python_version == '3.9'" ] meta_cat = [