openvinotoolkit · sgonorov · Oct 10, 2025 · Oct 10, 2025 · Oct 11, 2025 · Oct 12, 2025
diff --git a/tests/python_tests/conftest.py b/tests/python_tests/conftest.py
@@ -2,6 +2,7 @@
 import pytest
 import shutil
 import logging
+from pathlib import Path
 from utils.constants import get_ov_cache_models_dir
 
 # Configure logging
@@ -13,15 +14,15 @@
 def setup_and_teardown():
     """Fixture to set up and tear down the temporary directories."""
 
-    ov_cache_models_dir = get_ov_cache_models_dir()
+    ov_cache_models_dir = Path(get_ov_cache_models_dir())
 
     logger.info(f"Creating directory: {ov_cache_models_dir}")
-    os.makedirs(ov_cache_models_dir, exist_ok=True)
+    ov_cache_models_dir.mkdir(exist_ok=True, parents=True)
 
     yield
 
     if os.environ.get("CLEANUP_CACHE", "false").lower() != "false":
-        if os.path.exists(ov_cache_models_dir):
+        if ov_cache_models_dir.exists():
             logger.info(f"Removing temporary directory: {ov_cache_models_dir}")
             shutil.rmtree(ov_cache_models_dir)
         else:

diff --git a/tests/python_tests/data/test_dataset.py b/tests/python_tests/data/test_dataset.py
@@ -2,19 +2,21 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from openvino_genai import GenerationConfig
-from utils.generation_config import get_greedy, get_beam_search, get_multinomial_temperature
+from utils.generation_config import get_greedy, get_beam_search
+
+PROMPTS = [
+    "What is OpenVINO?",
+    "How are you?",
+    "What is your name?",
+    "Tell me something about Canada"
+]
+
+GENERATION_CONFIGS = [
+    get_greedy(),
+    get_beam_search(),
+    get_greedy(),
+    get_beam_search(),
+]
 
 def get_test_dataset() -> tuple[list[str], list[GenerationConfig]]:
-    prompts = [
-        "What is OpenVINO?",
-        "How are you?",
-        "What is your name?",
-        "Tell me something about Canada"
-    ]
-    generation_configs = [
-        get_greedy(),
-        get_beam_search(),
-        get_greedy(),
-        get_beam_search(),
-    ]
-    return (prompts, generation_configs)
+    return PROMPTS, GENERATION_CONFIGS
diff --git a/tests/python_tests/samples/conftest.py b/tests/python_tests/samples/conftest.py
@@ -7,6 +7,7 @@
 import gc
 import requests
 from pathlib import Path
+from huggingface_hub import hf_hub_download
 
 from utils.network import retry_request
 from utils.constants import get_ov_cache_dir
@@ -162,10 +163,16 @@
     "cmu_us_awb_arctic-wav-arctic_a0001.bin": "https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/cmu_us_awb_arctic-wav-arctic_a0001.bin"
 }
 
-SAMPLES_PY_DIR = Path(os.environ.get("SAMPLES_PY_DIR", os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../samples/python"))))
-SAMPLES_CPP_DIR = Path(os.environ.get("SAMPLES_CPP_DIR", os.getcwd()))
-SAMPLES_C_DIR = os.environ.get("SAMPLES_C_DIR", os.getcwd())
-SAMPLES_JS_DIR = os.environ.get("SAMPLES_JS_DIR", os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../samples/js")))
+SAMPLES_PY_DIR = Path(
+    os.environ.get("SAMPLES_PY_DIR") 
+    or Path(__file__).parent.joinpath("../../../samples/python").resolve()
+)
+SAMPLES_CPP_DIR = Path(os.environ.get("SAMPLES_CPP_DIR") or Path.cwd())
+SAMPLES_C_DIR = Path(os.environ.get("SAMPLES_C_DIR") or Path.cwd())
+SAMPLES_JS_DIR = Path(
+    os.environ.get("SAMPLES_JS_DIR") 
+    or Path(__file__).parent.joinpath("../../../samples/js").resolve()
+)
 
 @pytest.fixture(scope="session", autouse=True)
 def setup_and_teardown(request, tmp_path_factory):
@@ -194,17 +201,15 @@ def setup_and_teardown(request, tmp_path_factory):
 
 
 def download_gguf_model(model, model_path):
-    """Download the GGUF model using huggingface-cli."""
-    sub_env = os.environ.copy()
+    """Download the GGUF model using hf_hub_download."""
     model_name = model["name"]
     model_gguf_filename = model["gguf_filename"]
-    command = ["huggingface-cli", "download", model_name, model_gguf_filename, "--local-dir", model_path]
-    logger.info(f"Downloading command: {' '.join(command)}")
-    try:
-        retry_request(lambda: subprocess.run(command, check=True, text=True, env=sub_env, stderr=subprocess.STDOUT, stdout=subprocess.PIPE))
-    except subprocess.CalledProcessError as error:
-        logger.error(f"huggingface-cli returned {error.returncode}. Output:\n{error.output}")
-        raise
+    logger.info(f"Downloading {model_name}/{model_gguf_filename} to {model_path}")
+    retry_request(lambda: hf_hub_download(
+        repo_id=model_name,
+        filename=model_gguf_filename,
+        local_dir=model_path
+    ))
 
 def optimum_cli_convert(model, model_path):
     """Convert the model using optimum-cli."""
@@ -233,18 +238,19 @@ def convert_model(request):
     model = MODELS[model_id]
     model_name = model["name"]
     model_cache = os.path.join(models_cache, model_id)
-    model_path = os.path.join(model_cache, model_name)
     logger.info(f"Preparing model: {model_name}")
-    if not os.path.exists(model_path):
+    if not os.path.exists(model_cache):
         if "gguf_filename" in model:
             # Download the GGUF model if not already downloaded
-            download_gguf_model(model, model_path)
+            download_gguf_model(model, model_cache)
         else:
             # Convert the model if not already converted
-            optimum_cli_convert(model, model_path)
+            optimum_cli_convert(model, model_cache)
 
     if "gguf_filename" in model:
-        model_path = os.path.join(model_path, model["gguf_filename"])
+        model_path = os.path.join(model_cache, model["gguf_filename"])
+    else:
+        model_path = model_cache
     yield model_path
 
     # Cleanup the model after tests

diff --git a/tests/python_tests/samples/test_beam_search_causal_lm.py b/tests/python_tests/samples/test_beam_search_causal_lm.py
@@ -24,17 +24,17 @@ def test_sample_beam_search_causal_lm(self, convert_model, sample_args):
         if sys.platform == 'darwin':
             pytest.xfail("Ticket 173586")
         # C++ test
-        cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'beam_search_causal_lm')
+        cpp_sample = (SAMPLES_CPP_DIR / 'beam_search_causal_lm').as_posix()
         cpp_command = [cpp_sample, convert_model, f'"{sample_args}"']
         cpp_result = run_sample(cpp_command)
 
         # Python test
-        py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/beam_search_causal_lm.py")
+        py_script = (SAMPLES_PY_DIR / "text_generation/beam_search_causal_lm.py").as_posix()
         py_command = [sys.executable, py_script, convert_model, f'"{sample_args}"']
         py_result = run_sample(py_command)
 
         # Test JS sample
-        js_sample = os.path.join(SAMPLES_JS_DIR, "text_generation/beam_search_causal_lm.js")
+        js_sample = (SAMPLES_JS_DIR / "text_generation/beam_search_causal_lm.js").as_posix()
         js_command =['node', js_sample, convert_model, f'"{sample_args}"']
         js_result = run_sample(js_command)
 
@@ -64,19 +64,19 @@ def test_sample_beam_search_causal_lm_refs(self, request, convert_model, sample_
         if sys.platform == 'darwin':
             pytest.xfail("Ticket 173586")
         # C++ test
-        cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'beam_search_causal_lm')
+        cpp_sample = (SAMPLES_CPP_DIR / 'beam_search_causal_lm').as_posix()
         cpp_command = [cpp_sample, convert_model] + [f'"{arg}"' for arg in sample_args]
         cpp_result = run_sample(cpp_command)
         cpp_predictions = cpp_result.stdout
 
         # Python test
-        py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/beam_search_causal_lm.py")
+        py_script = (SAMPLES_PY_DIR / "text_generation/beam_search_causal_lm.py").as_posix()
         py_command = [sys.executable, py_script, convert_model] + [f'"{arg}"' for arg in sample_args]
         py_result = run_sample(py_command)
         py_predictions = py_result.stdout
 
         # Test JS sample
-        js_sample = os.path.join(SAMPLES_JS_DIR, "text_generation/beam_search_causal_lm.js")
+        js_sample = (SAMPLES_JS_DIR / "text_generation/beam_search_causal_lm.js").as_posix()
         js_command =['node', js_sample, convert_model] + [f'"{arg}"' for arg in sample_args]
         js_result = run_sample(js_command)
         js_predictions = js_result.stdout

diff --git a/tests/python_tests/samples/test_benchmark_genai.py b/tests/python_tests/samples/test_benchmark_genai.py
@@ -8,6 +8,7 @@
 from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR, SAMPLES_C_DIR
 from test_utils import run_sample
 
+
 class TestBenchmarkGenAI:
     @pytest.mark.llm
     @pytest.mark.samples
@@ -22,7 +23,7 @@ def test_py_sample_benchmark_genai(self, convert_model, prompt, sample_args):
         if sys.platform == 'darwin':
             pytest.xfail("Ticket 173586")
         # Test Python sample
-        py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/benchmark_genai.py")
+        py_script = (SAMPLES_PY_DIR / "text_generation/benchmark_genai.py").resolve()
         py_command = [sys.executable, py_script, '-m', convert_model, '-p', f'"{prompt}"'] + sample_args
         run_sample(py_command)
 
@@ -36,8 +37,10 @@ def test_py_sample_benchmark_genai(self, convert_model, prompt, sample_args):
         indirect=["convert_model"],
     )
     def test_cpp_sample_benchmark_genai(self, convert_model, prompt, sample_args):
+        if sys.platform == "darwin":
+            pytest.xfail("CPP sample exits with code 1 on macs.")
         # Test CPP sample
-        cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'benchmark_genai')
+        cpp_sample = (SAMPLES_CPP_DIR / 'benchmark_genai').resolve()
         cpp_command =[cpp_sample, '-m', convert_model, '-p', f'"{prompt}"'] + sample_args
         run_sample(cpp_command)
 
@@ -50,8 +53,8 @@ def test_cpp_sample_benchmark_genai(self, convert_model, prompt, sample_args):
         ],
         indirect=["convert_model"],
     )
-    def test_cpp_sample_benchmark_genai(self, convert_model, prompt, sample_args):
+    def test_c_sample_benchmark_genai(self, convert_model, prompt, sample_args):
         # Test C sample
-        c_sample = os.path.join(SAMPLES_C_DIR, 'benchmark_genai_c')
+        c_sample = (SAMPLES_C_DIR / 'benchmark_genai_c').resolve()
         c_command =[c_sample, '-m', convert_model, '-p', f'"{prompt}"'] + sample_args
         run_sample(c_command)
diff --git a/tests/python_tests/samples/test_benchmark_vlm.py b/tests/python_tests/samples/test_benchmark_vlm.py
@@ -21,11 +21,11 @@ class TestBenchmarkVLM:
     def test_sample_benchmark_vlm(self, convert_model, download_test_content):
         num_iter = "3"
         # Run C++ benchmark sample
-        benchmark_sample = os.path.join(SAMPLES_CPP_DIR, 'benchmark_vlm')
+        benchmark_sample = (SAMPLES_CPP_DIR / 'benchmark_vlm').as_posix()
         benchmark_cpp_command = [benchmark_sample, "-m" , convert_model, "-i", download_test_content, "-n", num_iter]
         run_sample(benchmark_cpp_command)
 
         # Run Python benchmark sample
-        benchmark_script = os.path.join(SAMPLES_PY_DIR, 'visual_language_chat/benchmark_vlm.py')
+        benchmark_script = (SAMPLES_PY_DIR / 'visual_language_chat/benchmark_vlm.py').as_posix()
         benchmark_py_command = [sys.executable, benchmark_script, "-m" , convert_model, "-i", download_test_content, "-n", num_iter]
         run_sample(benchmark_py_command)
diff --git a/tests/python_tests/samples/test_chat_sample.py b/tests/python_tests/samples/test_chat_sample.py
@@ -21,19 +21,19 @@ def test_chat_sample_refs(self, request, convert_model, prompts):
         if sys.platform == 'darwin':
             pytest.xfail("Ticket 173586")
         # C++ test
-        cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'chat_sample')
+        cpp_sample = (SAMPLES_CPP_DIR / 'chat_sample').as_posix()
         cpp_command = [cpp_sample, convert_model]
         cpp_result = run_sample(cpp_command, '\n'.join(prompts))
         cpp_predictions = cpp_result.stdout
 
         # Python test
-        py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/chat_sample.py")
+        py_script = (SAMPLES_PY_DIR / "text_generation/chat_sample.py").as_posix()
         py_command = [sys.executable, py_script, convert_model]
         py_result = run_sample(py_command, '\n'.join(prompts))
         py_predictions = py_result.stdout
 
         # C test
-        c_sample = os.path.join(SAMPLES_C_DIR, 'chat_sample_c')
+        c_sample = (SAMPLES_C_DIR / 'chat_sample_c').as_posix()
         c_command = [c_sample, convert_model]
         c_result = run_sample(c_command, '\n'.join(prompts))
         c_predictions = c_result.stdout

diff --git a/tests/python_tests/samples/test_continuous_batching_tools.py b/tests/python_tests/samples/test_continuous_batching_tools.py
@@ -18,7 +18,7 @@ class TestContinuousBatching:
     )
     def test_cpp_tool_accuracy(self, convert_model, sample_args):
         # Test CPP sample
-        cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'continuous_batching_accuracy')
+        cpp_sample = (SAMPLES_CPP_DIR / 'continuous_batching_accuracy').as_posix()
         cpp_command =[cpp_sample, '-m', convert_model] + sample_args
         run_sample(cpp_command)
 
@@ -28,7 +28,7 @@ def test_cpp_tool_accuracy(self, convert_model, sample_args):
     @pytest.mark.parametrize("sample_args", [["-n", "10", "--cache_size", "1"], ["-n", "10", "--dynamic_split_fuse", "--max_batch_size", "256", "--max_input_len", "256", "--cache_size", "1"]])
     def test_cpp_tool_benchmark(self, convert_model, download_test_content, sample_args):
         # Test CPP sample
-        cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'continuous_batching_benchmark')
+        cpp_sample = (SAMPLES_CPP_DIR / 'continuous_batching_benchmark').as_posix()
         cpp_command =[cpp_sample, '-m', convert_model, '--dataset', download_test_content] + sample_args
         run_sample(cpp_command)
 
diff --git a/tests/python_tests/samples/test_encrypted_model_causal_lm.py b/tests/python_tests/samples/test_encrypted_model_causal_lm.py
@@ -16,12 +16,12 @@ class TestEncryptedLM:
 
     def test_sample_encrypted_lm(self, convert_model, prompt):
         # Test CPP sample
-        cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'encrypted_model_causal_lm')
+        cpp_sample = (SAMPLES_CPP_DIR / 'encrypted_model_causal_lm').as_posix()
         cpp_command =[cpp_sample, convert_model, prompt]
         cpp_result = run_sample(cpp_command)
 
         # Test Python sample
-        py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/encrypted_model_causal_lm.py")
+        py_script = (SAMPLES_PY_DIR / "text_generation/encrypted_model_causal_lm.py").as_posix()
         py_command = [sys.executable, py_script, convert_model, prompt]
         py_result = run_sample(py_command)
 

diff --git a/tests/python_tests/samples/test_encrypted_model_vlm.py b/tests/python_tests/samples/test_encrypted_model_vlm.py
@@ -19,17 +19,17 @@ def test_sample_encrypted_lm(self, convert_model, download_test_content, generat
         env = os.environ.copy()
         env["OPENVINO_LOG_LEVEL"] = "0"
         # Test CPP sample
-        cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'encrypted_model_vlm')
+        cpp_sample = (SAMPLES_CPP_DIR / 'encrypted_model_vlm').as_posix()
         cpp_command =[cpp_sample, convert_model, os.path.dirname(generate_test_content), sample_args]
         cpp_result = run_sample(cpp_command, env=env)
 
         # Test Python sample
-        py_script = os.path.join(SAMPLES_PY_DIR, "visual_language_chat/encrypted_model_vlm.py")
+        py_script = (SAMPLES_PY_DIR / "visual_language_chat/encrypted_model_vlm.py").as_posix()
         py_command = [sys.executable, py_script, convert_model, os.path.dirname(generate_test_content), sample_args]
         py_result = run_sample(py_command, env=env)
 
         # Test common sample
-        py_common_script = os.path.join(SAMPLES_PY_DIR, "visual_language_chat/visual_language_chat.py")
+        py_common_script = (SAMPLES_PY_DIR / "visual_language_chat/visual_language_chat.py").as_posix()
         py_common_command = [sys.executable, py_common_script, convert_model, os.path.dirname(generate_test_content)]
         py_common_result = run_sample(py_common_command, sample_args, env)
 

diff --git a/tests/python_tests/samples/test_greedy_causal_lm.py b/tests/python_tests/samples/test_greedy_causal_lm.py
@@ -29,24 +29,24 @@ def test_sample_greedy_causal_lm(self, request, convert_model, sample_args):
         prompt = sample_args
 
         # C++ test
-        cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'greedy_causal_lm')
+        cpp_sample = (SAMPLES_CPP_DIR / 'greedy_causal_lm').as_posix()
         cpp_command = [cpp_sample, convert_model, prompt]
         cpp_result = run_sample(cpp_command)
         cpp_predictions = cpp_result.stdout
 
         # Python test
-        py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/greedy_causal_lm.py")
+        py_script = (SAMPLES_PY_DIR / "text_generation/greedy_causal_lm.py").as_posix()
         py_command = [sys.executable, py_script, convert_model, prompt]
         py_result = run_sample(py_command)
         py_predictions = py_result.stdout
 
         # Test C sample
-        c_sample = os.path.join(SAMPLES_C_DIR, "greedy_causal_lm_c")
+        c_sample = (SAMPLES_C_DIR / "greedy_causal_lm_c").as_posix()
         c_command =[c_sample, convert_model, sample_args]
         c_result = run_sample(c_command)
 
         # Test JS sample
-        js_sample = os.path.join(SAMPLES_JS_DIR, "text_generation/greedy_causal_lm.js")
+        js_sample = (SAMPLES_JS_DIR / "text_generation/greedy_causal_lm.js").as_posix()
         js_command =['node', js_sample, convert_model, sample_args]
         js_result = run_sample(js_command)
 

diff --git a/tests/python_tests/samples/test_inpainting.py b/tests/python_tests/samples/test_inpainting.py
@@ -29,11 +29,11 @@ class TestInpainting:
     )
     def test_sample_inpainting(self, download_model, prompt, download_test_content, download_mask_image):
         # Run Python sample
-        py_script = os.path.join(SAMPLES_PY_DIR, "image_generation/inpainting.py")
+        py_script = (SAMPLES_PY_DIR / "image_generation/inpainting.py").as_posix()
         py_command = [sys.executable, py_script, download_model, "'" + prompt + "'", download_test_content, download_mask_image]
         run_sample(py_command)
 
         # Run C++ sample
-        cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'inpainting')
+        cpp_sample = (SAMPLES_CPP_DIR / 'inpainting').as_posix()
         cpp_command = [cpp_sample, download_model, "'" + prompt + "'", download_test_content, download_mask_image]
         run_sample(cpp_command)
diff --git a/tests/python_tests/samples/test_lora.py b/tests/python_tests/samples/test_lora.py
@@ -15,6 +15,6 @@ class TestLora:
     @pytest.mark.parametrize("sample_args", ["How to create a table with two columns, one of them has type float, another one has type int?"])
     @pytest.mark.parametrize("download_test_content", ["adapter_model.safetensors"], indirect=True)
     def test_python_sample_lora(self, convert_model, download_test_content, sample_args):      
-        py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/lora_greedy_causal_lm.py")
+        py_script = (SAMPLES_PY_DIR / "text_generation/lora_greedy_causal_lm.py").as_posix()
         py_command = [sys.executable, py_script, convert_model, download_test_content, sample_args]
         run_sample(py_command)
diff --git a/tests/python_tests/samples/test_multinomial_causal_lm.py b/tests/python_tests/samples/test_multinomial_causal_lm.py
@@ -25,18 +25,18 @@ class TestMultinomialCausalLM:
     )
     def test_sample_multinomial_causal_lm(self, convert_model, sample_args):
         # Run C++ sample
-        cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'multinomial_causal_lm')
+        cpp_sample = (SAMPLES_CPP_DIR / 'multinomial_causal_lm').as_posix()
         cpp_command = [cpp_sample, convert_model, sample_args]
         cpp_result = run_sample(cpp_command)
 
         # Run Python sample
-        py_script = os.path.join(SAMPLES_PY_DIR, "text_generation/multinomial_causal_lm.py")
+        py_script = (SAMPLES_PY_DIR / "text_generation/multinomial_causal_lm.py").as_posix()
         py_command = [sys.executable, py_script, convert_model, sample_args]
         py_result = run_sample(py_command)
 
 
         # Test JS sample
-        js_sample = os.path.join(SAMPLES_JS_DIR, "text_generation/multinomial_causal_lm.js")
+        js_sample = (SAMPLES_JS_DIR / "text_generation/multinomial_causal_lm.js").as_posix()
         js_command =['node', js_sample, convert_model, sample_args]
         js_result = run_sample(js_command)