diff --git a/.gitignore b/.gitignore index 11c01a1..b83e6b7 100644 --- a/.gitignore +++ b/.gitignore @@ -123,3 +123,9 @@ ENV/ # macOS cache **/.DS_Store + +# Poetry +poetry.lock + +# Claude +.claude/* diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ceb5527 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,111 @@ +[tool.poetry] +name = "recommender-systems-evaluation" +version = "0.1.0" +description = "A comprehensive evaluation framework for deep learning-based recommender systems" +authors = ["Your Name "] +readme = "README.md" +license = "LICENSE" +packages = [{include = "*", from = "."}] + +[tool.poetry.dependencies] +python = ">=3.7,<3.12" +# Core dependencies - versions updated for compatibility +numpy = ">=1.16.2" +pandas = ">=0.24.2" +scipy = ">=1.2.1" +scikit-learn = ">=0.20.3" +matplotlib = ">=3.0.3" +seaborn = ">=0.9.0" +tqdm = ">=4.31.1" +h5py = ">=2.9.0" +# Note: TensorFlow 1.x and related packages would need to be installed separately +# as they have specific version requirements and compatibility issues + +[tool.poetry.group.dev.dependencies] +pytest = "^7.4.0" +pytest-cov = "^4.1.0" +pytest-mock = "^3.11.0" + +[tool.poetry.scripts] +test = "pytest:main" +tests = "pytest:main" + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py", "*_test.py", "tests.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = [ + "--strict-markers", + "--tb=short", + "--cov=.", + "--cov-branch", + "--cov-report=term-missing:skip-covered", + "--cov-report=html", + "--cov-report=xml", + "--cov-fail-under=80", + "-v" +] +markers = [ + "unit: marks tests as unit tests (fast, isolated)", + "integration: marks tests as integration tests (may require external resources)", + "slow: marks tests as slow running" +] +filterwarnings = [ + "ignore::DeprecationWarning", + "ignore::PendingDeprecationWarning" +] + +[tool.coverage.run] +source = ["."] +omit = [ + "*/tests/*", + "*/test_*.py", + "*_test.py", + "*/CythonCompiler/*", + "*/setup.py", + "*/conf.py", + "*/__pycache__/*", + "*/venv/*", + "*/virtualenv/*", + "*/.venv/*", + "*/site-packages/*" +] + +[tool.coverage.report] +precision = 2 +show_missing = true +skip_covered = false +fail_under = 80 +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "if TYPE_CHECKING:", + "raise AssertionError", + "raise NotImplementedError", + "if __name__ == .__main__.:", + "if 0:", + "class .*\\bProtocol\\):", + "@(abc\\.)?abstractmethod" +] + +[tool.coverage.html] +directory = "htmlcov" + +[tool.coverage.xml] +output = "coverage.xml" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +# Note: The original requirements.txt contains legacy dependencies that may need special handling: +# - tensorflow==1.13.1 (requires Python <=3.7) +# - keras==2.2.4 (old version, now part of TensorFlow) +# - dm-sonnet==1.29 (DeepMind library, may have specific requirements) +# - Other ML libraries with specific version constraints +# +# For development requiring these specific versions, consider: +# 1. Using a Python 3.7 virtual environment +# 2. Installing from requirements.txt directly: pip install -r requirements.txt +# 3. Using Docker containers with appropriate base images \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..7c209f7 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,257 @@ +""" +Shared pytest fixtures and configuration for all tests. +""" +import os +import tempfile +import shutil +from pathlib import Path +from typing import Generator, Dict, Any +import pytest +import numpy as np +import pandas as pd +from unittest.mock import Mock, MagicMock + + +@pytest.fixture +def temp_dir() -> Generator[Path, None, None]: + """ + Create a temporary directory for test files. + + Yields: + Path: Path to the temporary directory + """ + temp_path = Path(tempfile.mkdtemp()) + yield temp_path + # Cleanup after test + if temp_path.exists(): + shutil.rmtree(temp_path) + + +@pytest.fixture +def mock_config() -> Dict[str, Any]: + """ + Provide a mock configuration dictionary for testing. + + Returns: + Dict[str, Any]: Mock configuration settings + """ + return { + 'learning_rate': 0.01, + 'batch_size': 32, + 'epochs': 10, + 'embedding_size': 64, + 'regularization': 0.001, + 'validation_split': 0.2, + 'random_seed': 42 + } + + +@pytest.fixture +def sample_data_dir(temp_dir: Path) -> Path: + """ + Create a sample data directory with test files. + + Args: + temp_dir: Temporary directory fixture + + Returns: + Path: Path to the sample data directory + """ + data_dir = temp_dir / "sample_data" + data_dir.mkdir(exist_ok=True) + + # Create some sample files + (data_dir / "users.csv").write_text("user_id,age,gender\n1,25,M\n2,30,F\n3,35,M") + (data_dir / "items.csv").write_text("item_id,category,price\n1,electronics,100\n2,books,20\n3,electronics,200") + (data_dir / "ratings.csv").write_text("user_id,item_id,rating,timestamp\n1,1,5,1000\n1,2,3,1001\n2,3,4,1002") + + return data_dir + + +@pytest.fixture +def sample_sparse_matrix(): + """ + Create a sample sparse matrix for testing recommender systems. + + Returns: + scipy.sparse.csr_matrix: Sample user-item interaction matrix + """ + from scipy.sparse import csr_matrix + + # Create a small user-item matrix (5 users x 6 items) + data = np.array([5, 3, 4, 5, 2, 1, 4, 3, 5]) + row_indices = np.array([0, 0, 1, 2, 2, 3, 3, 4, 4]) + col_indices = np.array([0, 2, 3, 0, 4, 1, 5, 2, 3]) + + matrix = csr_matrix((data, (row_indices, col_indices)), shape=(5, 6)) + return matrix + + +@pytest.fixture +def sample_dataframe(): + """ + Create a sample pandas DataFrame for testing. + + Returns: + pd.DataFrame: Sample ratings dataframe + """ + data = { + 'user_id': [1, 1, 2, 2, 3, 3, 4, 4, 5], + 'item_id': [1, 3, 4, 1, 5, 2, 6, 3, 4], + 'rating': [5.0, 3.0, 4.0, 5.0, 2.0, 1.0, 4.0, 3.0, 5.0], + 'timestamp': [1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008] + } + return pd.DataFrame(data) + + +@pytest.fixture +def mock_recommender(): + """ + Create a mock recommender object for testing. + + Returns: + Mock: Mock recommender with basic methods + """ + recommender = Mock() + recommender.fit = MagicMock(return_value=None) + recommender.predict = MagicMock(return_value=np.array([0.8, 0.6, 0.9, 0.4, 0.7])) + recommender.recommend = MagicMock(return_value=(np.array([2, 0, 4]), np.array([0.9, 0.8, 0.7]))) + recommender.get_item_weights = MagicMock(return_value=np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6])) + recommender.save_model = MagicMock(return_value=None) + recommender.load_model = MagicMock(return_value=None) + return recommender + + +@pytest.fixture +def mock_evaluator(): + """ + Create a mock evaluator object for testing. + + Returns: + Mock: Mock evaluator with evaluation methods + """ + evaluator = Mock() + evaluator.evaluate_recommender = MagicMock(return_value={ + 'precision': 0.75, + 'recall': 0.68, + 'f1_score': 0.71, + 'ndcg': 0.82, + 'map': 0.79 + }) + return evaluator + + +@pytest.fixture +def mock_data_reader(): + """ + Create a mock data reader for testing. + + Returns: + Mock: Mock data reader with load methods + """ + reader = Mock() + reader.load_data = MagicMock(return_value={ + 'URM_train': Mock(shape=(100, 200)), + 'URM_test': Mock(shape=(100, 200)), + 'URM_validation': Mock(shape=(100, 200)), + 'ICM': Mock(shape=(200, 50)), + 'UCM': Mock(shape=(100, 30)) + }) + return reader + + +@pytest.fixture +def mock_similarity_matrix(): + """ + Create a mock similarity matrix for testing. + + Returns: + np.ndarray: Sample similarity matrix + """ + n_items = 6 + similarity = np.random.rand(n_items, n_items) + # Make it symmetric + similarity = (similarity + similarity.T) / 2 + # Set diagonal to 1 + np.fill_diagonal(similarity, 1.0) + return similarity + + +@pytest.fixture(autouse=True) +def reset_random_seed(): + """ + Automatically reset random seeds before each test for reproducibility. + """ + np.random.seed(42) + import random + random.seed(42) + + # If tensorflow is available, set its seed too + try: + import tensorflow as tf + tf.random.set_seed(42) + except ImportError: + pass + + +@pytest.fixture +def capture_logs(caplog): + """ + Fixture to capture log messages during tests. + + Args: + caplog: pytest's built-in log capture fixture + + Returns: + caplog: Configured log capture + """ + caplog.set_level("DEBUG") + return caplog + + +@pytest.fixture +def mock_model_checkpoint(temp_dir: Path): + """ + Create a mock model checkpoint file. + + Args: + temp_dir: Temporary directory fixture + + Returns: + Path: Path to the mock checkpoint file + """ + checkpoint_path = temp_dir / "model_checkpoint.pkl" + + # Create a simple mock checkpoint + import pickle + checkpoint_data = { + 'model_state': {'layer1': np.random.rand(10, 10), 'layer2': np.random.rand(10, 5)}, + 'optimizer_state': {'learning_rate': 0.01, 'iteration': 1000}, + 'metrics': {'train_loss': 0.05, 'val_loss': 0.08} + } + + with open(checkpoint_path, 'wb') as f: + pickle.dump(checkpoint_data, f) + + return checkpoint_path + + +@pytest.fixture +def environment_variables(): + """ + Temporarily set environment variables for testing. + + Yields: + Dict[str, str]: Dictionary to set environment variables + """ + original_env = os.environ.copy() + test_env = {} + + yield test_env + + # Restore original environment + os.environ.clear() + os.environ.update(original_env) + + # Apply test environment variables + os.environ.update(test_env) \ No newline at end of file diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_infrastructure_validation.py b/tests/test_infrastructure_validation.py new file mode 100644 index 0000000..d9067da --- /dev/null +++ b/tests/test_infrastructure_validation.py @@ -0,0 +1,161 @@ +""" +Validation tests to ensure the testing infrastructure is properly set up. +""" +import pytest +import sys +import os +from pathlib import Path +import numpy as np +import pandas as pd + + +class TestInfrastructureValidation: + """Test suite to validate the testing infrastructure setup.""" + + def test_pytest_is_importable(self): + """Test that pytest is properly installed and importable.""" + import pytest + assert pytest.__version__ + + def test_pytest_cov_is_importable(self): + """Test that pytest-cov is properly installed and importable.""" + import pytest_cov + assert pytest_cov.__version__ + + def test_pytest_mock_is_importable(self): + """Test that pytest-mock is properly installed and importable.""" + import pytest_mock + # pytest_mock doesn't expose __version__, just check it's importable + assert pytest_mock is not None + + def test_project_root_in_path(self): + """Test that the project root is in Python path.""" + project_root = Path(__file__).parent.parent + assert str(project_root) in sys.path or str(project_root.absolute()) in sys.path + + def test_temp_dir_fixture(self, temp_dir): + """Test that the temp_dir fixture works correctly.""" + assert temp_dir.exists() + assert temp_dir.is_dir() + + # Test file creation in temp dir + test_file = temp_dir / "test.txt" + test_file.write_text("test content") + assert test_file.exists() + assert test_file.read_text() == "test content" + + def test_mock_config_fixture(self, mock_config): + """Test that the mock_config fixture provides expected structure.""" + assert isinstance(mock_config, dict) + assert 'learning_rate' in mock_config + assert 'batch_size' in mock_config + assert 'epochs' in mock_config + assert mock_config['random_seed'] == 42 + + def test_sample_data_dir_fixture(self, sample_data_dir): + """Test that the sample_data_dir fixture creates expected files.""" + assert sample_data_dir.exists() + assert (sample_data_dir / "users.csv").exists() + assert (sample_data_dir / "items.csv").exists() + assert (sample_data_dir / "ratings.csv").exists() + + # Check content + users_df = pd.read_csv(sample_data_dir / "users.csv") + assert len(users_df) == 3 + assert list(users_df.columns) == ['user_id', 'age', 'gender'] + + def test_sample_sparse_matrix_fixture(self, sample_sparse_matrix): + """Test that the sample_sparse_matrix fixture works correctly.""" + from scipy.sparse import issparse + + assert issparse(sample_sparse_matrix) + assert sample_sparse_matrix.shape == (5, 6) + assert sample_sparse_matrix.nnz == 9 # number of non-zero elements + + def test_sample_dataframe_fixture(self, sample_dataframe): + """Test that the sample_dataframe fixture provides expected data.""" + assert isinstance(sample_dataframe, pd.DataFrame) + assert len(sample_dataframe) == 9 + assert list(sample_dataframe.columns) == ['user_id', 'item_id', 'rating', 'timestamp'] + assert sample_dataframe['rating'].min() >= 1.0 + assert sample_dataframe['rating'].max() <= 5.0 + + def test_mock_recommender_fixture(self, mock_recommender): + """Test that the mock_recommender fixture has expected methods.""" + assert hasattr(mock_recommender, 'fit') + assert hasattr(mock_recommender, 'predict') + assert hasattr(mock_recommender, 'recommend') + + # Test method calls + mock_recommender.fit(None) + predictions = mock_recommender.predict(None) + assert isinstance(predictions, np.ndarray) + + items, scores = mock_recommender.recommend(None) + assert isinstance(items, np.ndarray) + assert isinstance(scores, np.ndarray) + + def test_capture_logs_fixture(self, capture_logs): + """Test that log capturing works correctly.""" + import logging + + logger = logging.getLogger(__name__) + logger.info("Test info message") + logger.debug("Test debug message") + + assert "Test info message" in capture_logs.text + assert "Test debug message" in capture_logs.text + + @pytest.mark.parametrize("marker", ["unit", "integration", "slow"]) + def test_custom_markers_defined(self, marker): + """Test that custom pytest markers are properly defined.""" + # Custom markers should be usable without error + try: + getattr(pytest.mark, marker) + except AttributeError: + pytest.fail(f"Marker '{marker}' is not defined") + + def test_coverage_configuration(self): + """Test that coverage is properly configured.""" + # This test verifies that the test can run with coverage + # The actual coverage configuration is tested by running pytest with coverage + import coverage + assert coverage.__version__ + + @pytest.mark.unit + def test_unit_marker(self): + """Test that unit test marker works.""" + assert True + + @pytest.mark.integration + def test_integration_marker(self): + """Test that integration test marker works.""" + assert True + + @pytest.mark.slow + def test_slow_marker(self): + """Test that slow test marker works.""" + import time + time.sleep(0.1) # Simulate slow test + assert True + + +class TestPytestCommands: + """Test that pytest commands are properly configured.""" + + def test_pytest_runs_from_project_root(self): + """Test that pytest can be run from project root.""" + project_root = Path(__file__).parent.parent + assert project_root.exists() + assert (project_root / "pyproject.toml").exists() + assert (project_root / "tests").exists() + + def test_test_discovery_patterns(self): + """Test that test files follow naming conventions.""" + test_file = Path(__file__) + assert test_file.name.startswith("test_") or test_file.name.endswith("_test.py") + + +if __name__ == "__main__": + # Allow running this file directly for quick validation + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/tests/test_simple_validation.py b/tests/test_simple_validation.py new file mode 100644 index 0000000..136c825 --- /dev/null +++ b/tests/test_simple_validation.py @@ -0,0 +1,36 @@ +""" +Simple validation test to ensure coverage reporting works. +""" + + +def add_numbers(a: int, b: int) -> int: + """Add two numbers together.""" + return a + b + + +def multiply_numbers(a: int, b: int) -> int: + """Multiply two numbers together.""" + return a * b + + +def divide_numbers(a: int, b: int) -> float: + """Divide two numbers.""" + if b == 0: + raise ValueError("Cannot divide by zero") + return a / b + + +class TestSimpleValidation: + """Test basic functionality to validate coverage.""" + + def test_add_numbers(self): + """Test addition function.""" + assert add_numbers(2, 3) == 5 + assert add_numbers(-1, 1) == 0 + assert add_numbers(0, 0) == 0 + + def test_multiply_numbers(self): + """Test multiplication function.""" + assert multiply_numbers(2, 3) == 6 + assert multiply_numbers(-2, 3) == -6 + assert multiply_numbers(0, 100) == 0 \ No newline at end of file diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29