From b9c1921caf93d8a4c6c5ac375df2e5cd7866407b Mon Sep 17 00:00:00 2001
From: Amine <amine.raouane@enim.ac.ma>
Date: Fri, 11 Jul 2025 20:19:51 +0100
Subject: [PATCH 1/5] =?UTF-8?q?feat:=20add=20AI=20module=20for=20LLM=20int?=
 =?UTF-8?q?eraction=20and=20a=20=20heuristic=20for=20checking=20code?=
 =?UTF-8?q?=E2=80=93docstring=20consistency?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Amine <amine.raouane@enim.ac.ma>
---
 src/macaron/ai.py                             | 175 ++++++++++++++++++
 src/macaron/config/defaults.ini               |  14 ++
 .../pypi_heuristics/heuristics.py             |   3 +
 .../sourcecode/matching_docstrings.py         | 101 ++++++++++
 .../slsa_analyzer/build_tool/gradle.py        |  89 +++++++++
 src/macaron/slsa_analyzer/build_tool/maven.py |  68 +++++++
 src/macaron/slsa_analyzer/build_tool/pip.py   |   5 +
 .../slsa_analyzer/build_tool/poetry.py        |   5 +
 .../checks/detect_malicious_metadata_check.py |  12 +-
 .../pypi/test_matching_docstrings.py          | 103 +++++++++++
 10 files changed, 572 insertions(+), 3 deletions(-)
 create mode 100644 src/macaron/ai.py
 create mode 100644 src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py
 create mode 100644 tests/malware_analyzer/pypi/test_matching_docstrings.py

diff --git a/src/macaron/ai.py b/src/macaron/ai.py
new file mode 100644
index 000000000..eb48ba08b
--- /dev/null
+++ b/src/macaron/ai.py
@@ -0,0 +1,175 @@
+# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module provides a client for interacting with a Large Language Model (LLM)."""
+
+import json
+import logging
+import re
+from typing import Any, TypeVar
+
+from pydantic import BaseModel, ValidationError
+
+from macaron.config.defaults import defaults
+from macaron.errors import ConfigurationError, HeuristicAnalyzerValueError
+from macaron.util import send_post_http_raw
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+T = TypeVar("T", bound=BaseModel)
+
+
+class AIClient:
+    """A client for interacting with a Large Language Model."""
+
+    def __init__(self, system_prompt: str):
+        """
+        Initialize the AI client.
+
+        The LLM configuration (enabled, API key, endpoint, model) is read from defaults.
+        """
+        self.enabled, self.api_endpoint, self.api_key, self.model, self.context_window = self._load_defaults()
+        self.system_prompt = system_prompt.strip() or "You are a helpful AI assistant."
+        logger.info("AI client is %s.", "enabled" if self.enabled else "disabled")
+
+    def _load_defaults(self) -> tuple[bool, str, str, str, int]:
+        """Load the LLM configuration from the defaults."""
+        section_name = "llm"
+        enabled, api_key, api_endpoint, model, context_window = False, "", "", "", 10000
+
+        if defaults.has_section(section_name):
+            section = defaults[section_name]
+            enabled = section.get("enabled", "False").strip().lower() == "true"
+            api_key = section.get("api_key", "").strip()
+            api_endpoint = section.get("api_endpoint", "").strip()
+            model = section.get("model", "").strip()
+            context_window = section.getint("context_window", 10000)
+
+        if enabled:
+            if not api_key:
+                raise ConfigurationError("API key for the AI client is not configured.")
+            if not api_endpoint:
+                raise ConfigurationError("API endpoint for the AI client is not configured.")
+            if not model:
+                raise ConfigurationError("Model for the AI client is not configured.")
+
+        return enabled, api_endpoint, api_key, model, context_window
+
+    def _validate_response(self, response_text: str, response_model: type[T]) -> T:
+        """
+        Validate and parse the response from the LLM.
+
+        If raw JSON parsing fails, attempts to extract a JSON object from text.
+
+        Parameters
+        ----------
+        response_text: str
+            The response text from the LLM.
+        response_model: Type[T]
+            The Pydantic model to validate the response against.
+
+        Returns
+        -------
+        bool
+            The validated Pydantic model instance.
+
+        Raises
+        ------
+        HeuristicAnalyzerValueError
+            If there is an error in parsing or validating the response.
+        """
+        try:
+            data = json.loads(response_text)
+        except json.JSONDecodeError:
+            logger.debug("Full JSON parse failed; trying to extract JSON from text.")
+            # If the response is not a valid JSON, try to extract a JSON object from the text.
+            match = re.search(r"\{.*\}", response_text, re.DOTALL)
+            if not match:
+                raise HeuristicAnalyzerValueError("No JSON object found in the LLM response.") from match
+            try:
+                data = json.loads(match.group(0))
+            except json.JSONDecodeError as e:
+                logger.error("Failed to parse extracted JSON: %s", e)
+                raise HeuristicAnalyzerValueError("Invalid JSON extracted from response.") from e
+
+        try:
+            return response_model.model_validate(data)
+        except ValidationError as e:
+            logger.error("Validation failed against response model: %s", e)
+            raise HeuristicAnalyzerValueError("Response JSON validation failed.") from e
+
+    def invoke(
+        self,
+        user_prompt: str,
+        temperature: float = 0.2,
+        max_tokens: int = 4000,
+        structured_output: type[T] | None = None,
+        timeout: int = 30,
+    ) -> Any:
+        """
+        Invoke the LLM and optionally validate its response.
+
+        Parameters
+        ----------
+        user_prompt: str
+            The user prompt to send to the LLM.
+        temperature: float
+            The temperature for the LLM response.
+        max_tokens: int
+            The maximum number of tokens for the LLM response.
+        structured_output: Optional[Type[T]]
+            The Pydantic model to validate the response against. If provided, the response will be parsed and validated.
+        timeout: int
+            The timeout for the HTTP request in seconds.
+
+        Returns
+        -------
+        Optional[T | str]
+            The validated Pydantic model instance if `structured_output` is provided,
+            or the raw string response if not.
+
+        Raises
+        ------
+        HeuristicAnalyzerValueError
+            If there is an error in parsing or validating the response.
+        """
+        if not self.enabled:
+            raise ConfigurationError("AI client is not enabled. Please check your configuration.")
+
+        if len(user_prompt.split()) > self.context_window:
+            logger.warning(
+                "User prompt exceeds context window (%s words). "
+                "Truncating the prompt to fit within the context window.",
+                self.context_window,
+            )
+            user_prompt = " ".join(user_prompt.split()[: self.context_window])
+
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
+        payload = {
+            "model": self.model,
+            "messages": [{"role": "system", "content": self.system_prompt}, {"role": "user", "content": user_prompt}],
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+
+        try:
+            response = send_post_http_raw(url=self.api_endpoint, json_data=payload, headers=headers, timeout=timeout)
+            if not response:
+                raise HeuristicAnalyzerValueError("No response received from the LLM.")
+            response_json = response.json()
+            usage = response_json.get("usage", {})
+
+            if usage:
+                usage_str = ", ".join(f"{key} = {value}" for key, value in usage.items())
+                logger.info("LLM call token usage: %s", usage_str)
+
+            message_content = response_json["choices"][0]["message"]["content"]
+
+            if not structured_output:
+                logger.debug("Returning raw message content (no structured output requested).")
+                return message_content
+            return self._validate_response(message_content, structured_output)
+
+        except Exception as e:
+            logger.error("Error during LLM invocation: %s", e)
+            raise HeuristicAnalyzerValueError(f"Failed to get or validate LLM response: {e}") from e
diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini
index 8aa5e7a11..0d43d46bc 100644
--- a/src/macaron/config/defaults.ini
+++ b/src/macaron/config/defaults.ini
@@ -632,3 +632,17 @@ custom_semgrep_rules_path =
 # .yaml prefix. Note, this will be ignored if a path to custom semgrep rules is not provided. This list may not contain
 # duplicated elements, meaning that ruleset names must be unique.
 disabled_custom_rulesets =
+
+[llm]
+# The LLM configuration for Macaron.
+# If enabled, the LLM will be used to analyze the results and provide insights.
+enabled =
+# The API key for the LLM service.
+api_key =
+# The API endpoint for the LLM service.
+api_endpoint =
+# The model to use for the LLM service.
+model =
+# The context window size for the LLM service.
+# This is the maximum number of tokens that the LLM can process in a single request.
+context_window = 10000
diff --git a/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py b/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py
index 1f1fdbf2e..0286cda2c 100644
--- a/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py
+++ b/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py
@@ -49,6 +49,9 @@ class Heuristics(str, Enum):
     #: Indicates that the package has a similar structure to other packages maintained by the same user.
     SIMILAR_PROJECTS = "similar_projects"
 
+    #: Indicates that the package contains some code that doesn't match the docstrings.
+    MATCHING_DOCSTRINGS = "matching_docstrings"
+
 
 class HeuristicResult(str, Enum):
     """Result type indicating the outcome of a heuristic."""
diff --git a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py
new file mode 100644
index 000000000..ca9cafbe3
--- /dev/null
+++ b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py
@@ -0,0 +1,101 @@
+# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This analyzer checks the iconsistency of code with its docstrings."""
+
+import logging
+import time
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+from macaron.ai import AIClient
+from macaron.json_tools import JsonType
+from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
+from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
+from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+class Result(BaseModel):
+    """The result after analysing the code with its docstrings."""
+
+    decision: Literal["consistent", "inconsistent"] = Field(
+        description=""" The final decision after analysing the code with its docstrings.
+        It can be either 'consistent' or 'inconsistent'."""
+    )
+    reason: str = Field(
+        description=" The reason for the decision made. It should be a short sentence explaining the decision."
+    )
+    inconsistent_code_part: str | None = Field(
+        default=None,
+        description=""" The specific part of the code that is inconsistent with the docstring.
+        Empty if the decision is 'consistent'.""",
+    )
+
+
+class MatchingDocstringsAnalyzer(BaseHeuristicAnalyzer):
+    """Check whether the docstrings and the code components are consistent."""
+
+    SYSTEM_PROMPT = """
+        You are a code master who can detect the inconsistency of the code with the docstrings that describes its components.
+        You will be given a python code file. Your task is to determine whether the code is consistent with the docstrings.
+        Wrap the output in `json` tags.
+        Your response must be a JSON object matching this schema:
+        {
+            "decision": "'consistent' or 'inconsistent'",
+            "reason": "A short explanation.", "inconsistent_code_part":
+            "The inconsistent code, or null."
+        }
+
+        /no_think
+    """
+
+    REQUEST_INTERVAL = 0.5
+
+    def __init__(self) -> None:
+        super().__init__(
+            name="matching_docstrings_analyzer",
+            heuristic=Heuristics.MATCHING_DOCSTRINGS,
+            depends_on=None,
+        )
+        self.client = AIClient(system_prompt=self.SYSTEM_PROMPT.strip())
+
+    def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
+        """Analyze the package.
+
+        Parameters
+        ----------
+        pypi_package_json: PyPIPackageJsonAsset
+            The PyPI package JSON asset object.
+
+        Returns
+        -------
+        tuple[HeuristicResult, dict[str, JsonType]]:
+            The result and related information collected during the analysis.
+        """
+        if not self.client.enabled:
+            logger.warning("AI client is not enabled, skipping the matching docstrings analysis.")
+            return HeuristicResult.SKIP, {}
+
+        download_result = pypi_package_json.download_sourcecode()
+        if not download_result:
+            logger.warning("No source code found for the package, skipping the matching docstrings analysis.")
+            return HeuristicResult.SKIP, {}
+
+        for file, content in pypi_package_json.iter_sourcecode():
+            if file.endswith(".py"):
+                time.sleep(self.REQUEST_INTERVAL)  # Respect the request interval to avoid rate limiting.
+                code_str = content.decode("utf-8", "ignore")
+                analysis_result = self.client.invoke(
+                    user_prompt=code_str,
+                    structured_output=Result,
+                )
+                if analysis_result and analysis_result.decision == "inconsistent":
+                    return HeuristicResult.FAIL, {
+                        "file": file,
+                        "reason": analysis_result.reason,
+                        "inconsistent part": analysis_result.inconsistent_code_part or "",
+                    }
+        return HeuristicResult.PASS, {}
diff --git a/src/macaron/slsa_analyzer/build_tool/gradle.py b/src/macaron/slsa_analyzer/build_tool/gradle.py
index bd316dd30..c67f428cd 100644
--- a/src/macaron/slsa_analyzer/build_tool/gradle.py
+++ b/src/macaron/slsa_analyzer/build_tool/gradle.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module contains the Gradle class which inherits BaseBuildTool.
@@ -69,6 +70,94 @@ def is_detected(self, repo_path: str) -> bool:
         gradle_config_files = self.build_configs + self.entry_conf
         return any(file_exists(repo_path, file) for file in gradle_config_files)
 
+    def prepare_config_files(self, wrapper_path: str, build_dir: str) -> bool:
+        """Prepare the necessary wrapper files for running the build.
+
+        This method will return False if there is any errors happened during operation.
+
+        Parameters
+        ----------
+        wrapper_path : str
+            The path where all necessary wrapper files are located.
+        build_dir : str
+            The path of the build dir. This is where all files are copied to.
+
+        Returns
+        -------
+        bool
+            True if succeed else False.
+        """
+        # The path of the needed wrapper files
+        wrapper_files = self.wrapper_files
+
+        if copy_file_bulk(wrapper_files, wrapper_path, build_dir):
+            # Ensure that gradlew is executable.
+            file_path = os.path.join(build_dir, "gradlew")
+            status = os.stat(file_path)
+            if oct(status.st_mode)[-3:] != "744":
+                logger.debug("%s does not have 744 permission. Changing it to 744.")
+                os.chmod(file_path, 0o744)
+            return True
+
+        return False
+
+    def get_dep_analyzer(self) -> CycloneDxGradle:
+        """Create a DependencyAnalyzer for the Gradle build tool.
+
+        Returns
+        -------
+        CycloneDxGradle
+            The CycloneDxGradle object.
+
+        Raises
+        ------
+        DependencyAnalyzerError
+        """
+        if "dependency.resolver" not in defaults or "dep_tool_gradle" not in defaults["dependency.resolver"]:
+            raise DependencyAnalyzerError("No default dependency analyzer is found.")
+        if not DependencyAnalyzer.tool_valid(defaults.get("dependency.resolver", "dep_tool_gradle")):
+            raise DependencyAnalyzerError(
+                f"Dependency analyzer {defaults.get('dependency.resolver', 'dep_tool_gradle')} is not valid.",
+            )
+
+        tool_name, tool_version = tuple(
+            defaults.get(
+                "dependency.resolver",
+                "dep_tool_gradle",
+                fallback="cyclonedx-gradle:1.7.3",
+            ).split(":")
+        )
+        if tool_name == DependencyTools.CYCLONEDX_GRADLE:
+            return CycloneDxGradle(
+                resources_path=global_config.resources_path,
+                file_name="bom.json",
+                tool_name=tool_name,
+                tool_version=tool_version,
+            )
+
+        raise DependencyAnalyzerError(f"Unsupported SBOM generator for Gradle: {tool_name}.")
+
+    def get_gradle_exec(self, repo_path: str) -> str:
+        """Get the Gradle executable for the repo.
+
+        Parameters
+        ----------
+        repo_path: str
+            The absolute path to a repository containing Gradle projects.
+
+        Returns
+        -------
+        str
+            The absolute path to the Gradle executable.
+        """
+        # We try to use the gradlew that comes with the repository first.
+        repo_gradlew = os.path.join(repo_path, "gradlew")
+        if os.path.isfile(repo_gradlew) and os.access(repo_gradlew, os.X_OK):
+            return repo_gradlew
+
+        # We use Macaron's built-in gradlew as a fallback option.
+        return os.path.join(os.path.join(macaron.MACARON_PATH, "resources"), "gradlew")
+
     def get_group_id(self, gradle_exec: str, project_path: str) -> str | None:
         """Get the group id of a Gradle project.
 
diff --git a/src/macaron/slsa_analyzer/build_tool/maven.py b/src/macaron/slsa_analyzer/build_tool/maven.py
index 0e89849af..922fb7b71 100644
--- a/src/macaron/slsa_analyzer/build_tool/maven.py
+++ b/src/macaron/slsa_analyzer/build_tool/maven.py
@@ -64,3 +64,71 @@ def is_detected(self, repo_path: str) -> bool:
             return False
         maven_config_files = self.build_configs
         return any(file_exists(repo_path, file) for file in maven_config_files)
+
+    def prepare_config_files(self, wrapper_path: str, build_dir: str) -> bool:
+        """Prepare the necessary wrapper files for running the build.
+
+        This method will return False if there is any errors happened during operation.
+
+        Parameters
+        ----------
+        wrapper_path : str
+            The path where all necessary wrapper files are located.
+        build_dir : str
+            The path of the build dir. This is where all files are copied to.
+
+        Returns
+        -------
+        bool
+            True if succeed else False.
+        """
+        # The path of the needed wrapper files
+        wrapper_files = self.wrapper_files
+
+        if copy_file_bulk(wrapper_files, wrapper_path, build_dir):
+            # Ensure that mvnw is executable.
+            file_path = os.path.join(build_dir, "mvnw")
+            status = os.stat(file_path)
+            if oct(status.st_mode)[-3:] != "744":
+                logger.debug("%s does not have 744 permission. Changing it to 744.")
+                os.chmod(file_path, 0o744)
+            return True
+
+        return False
+
+    def get_dep_analyzer(self) -> CycloneDxMaven:
+        """
+        Create a DependencyAnalyzer for the Maven build tool.
+
+        Returns
+        -------
+        CycloneDxMaven
+            The CycloneDxMaven object.
+
+        Raises
+        ------
+        DependencyAnalyzerError
+        """
+        if "dependency.resolver" not in defaults or "dep_tool_maven" not in defaults["dependency.resolver"]:
+            raise DependencyAnalyzerError("No default dependency analyzer is found.")
+        if not DependencyAnalyzer.tool_valid(defaults.get("dependency.resolver", "dep_tool_maven")):
+            raise DependencyAnalyzerError(
+                f"Dependency analyzer {defaults.get('dependency.resolver', 'dep_tool_maven')} is not valid.",
+            )
+
+        tool_name, tool_version = tuple(
+            defaults.get(
+                "dependency.resolver",
+                "dep_tool_maven",
+                fallback="cyclonedx-maven:2.6.2",
+            ).split(":")
+        )
+        if tool_name == DependencyTools.CYCLONEDX_MAVEN:
+            return CycloneDxMaven(
+                resources_path=global_config.resources_path,
+                file_name="bom.json",
+                tool_name=tool_name,
+                tool_version=tool_version,
+            )
+
+        raise DependencyAnalyzerError(f"Unsupported SBOM generator for Maven: {tool_name}.")
diff --git a/src/macaron/slsa_analyzer/build_tool/pip.py b/src/macaron/slsa_analyzer/build_tool/pip.py
index 1926ca33b..073380ec2 100644
--- a/src/macaron/slsa_analyzer/build_tool/pip.py
+++ b/src/macaron/slsa_analyzer/build_tool/pip.py
@@ -64,6 +64,11 @@ def get_dep_analyzer(self) -> DependencyAnalyzer:
         DependencyAnalyzer
             The DependencyAnalyzer object.
         """
+        tool_name = "cyclonedx_py"
+        if not DependencyAnalyzer.tool_valid(f"{tool_name}:{cyclonedx_version}"):
+            raise DependencyAnalyzerError(
+                f"Dependency analyzer {defaults.get('dependency.resolver', 'dep_tool_gradle')} is not valid.",
+            )
         return CycloneDxPython(
             resources_path=global_config.resources_path,
             file_name="python_sbom.json",
diff --git a/src/macaron/slsa_analyzer/build_tool/poetry.py b/src/macaron/slsa_analyzer/build_tool/poetry.py
index a1d5a4a0d..3e928dfca 100644
--- a/src/macaron/slsa_analyzer/build_tool/poetry.py
+++ b/src/macaron/slsa_analyzer/build_tool/poetry.py
@@ -102,6 +102,11 @@ def get_dep_analyzer(self) -> DependencyAnalyzer:
         DependencyAnalyzer
             The DependencyAnalyzer object.
         """
+        tool_name = "cyclonedx_py"
+        if not DependencyAnalyzer.tool_valid(f"{tool_name}:{cyclonedx_version}"):
+            raise DependencyAnalyzerError(
+                f"Dependency analyzer {defaults.get('dependency.resolver', 'dep_tool_gradle')} is not valid.",
+            )
         return CycloneDxPython(
             resources_path=global_config.resources_path,
             file_name="python_sbom.json",
diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
index 9f09362a4..a09289713 100644
--- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
+++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
@@ -28,6 +28,7 @@
 from macaron.malware_analyzer.pypi_heuristics.metadata.typosquatting_presence import TyposquattingPresenceAnalyzer
 from macaron.malware_analyzer.pypi_heuristics.metadata.unchanged_release import UnchangedReleaseAnalyzer
 from macaron.malware_analyzer.pypi_heuristics.metadata.wheel_absence import WheelAbsenceAnalyzer
+from macaron.malware_analyzer.pypi_heuristics.sourcecode.matching_docstrings import MatchingDocstringsAnalyzer
 from macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer import PyPISourcecodeAnalyzer
 from macaron.malware_analyzer.pypi_heuristics.sourcecode.suspicious_setup import SuspiciousSetupAnalyzer
 from macaron.slsa_analyzer.analyze_context import AnalyzeContext
@@ -366,6 +367,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
         TyposquattingPresenceAnalyzer,
         FakeEmailAnalyzer,
         SimilarProjectAnalyzer,
+        MatchingDocstringsAnalyzer,
     ]
 
     # name used to query the result of all problog rules, so it can be accessed outside the model.
@@ -445,6 +447,10 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
         failed({Heuristics.SIMILAR_PROJECTS.value}),
         failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}),
         failed({Heuristics.FAKE_EMAIL.value}).
+    % Package released with a name similar to a popular package.
+    {Confidence.MEDIUM.value}::trigger(malware_medium_confidence_3) :-
+        quickUndetailed, forceSetup, failed({Heuristics.MATCHING_DOCSTRINGS.value}).
+
     % ----- Evaluation -----
 
     % Aggregate result
@@ -452,10 +458,10 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
     {problog_result_access} :- trigger(malware_high_confidence_2).
     {problog_result_access} :- trigger(malware_high_confidence_3).
     {problog_result_access} :- trigger(malware_high_confidence_4).
-    {problog_result_access} :- trigger(malware_medium_confidence_1).
-    {problog_result_access} :- trigger(malware_medium_confidence_2).
-    {problog_result_access} :- trigger(malware_medium_confidence_3).
     {problog_result_access} :- trigger(malware_medium_confidence_4).
+    {problog_result_access} :- trigger(malware_medium_confidence_3).
+    {problog_result_access} :- trigger(malware_medium_confidence_2).
+    {problog_result_access} :- trigger(malware_medium_confidence_1).
     query({problog_result_access}).
 
     % Explainability
diff --git a/tests/malware_analyzer/pypi/test_matching_docstrings.py b/tests/malware_analyzer/pypi/test_matching_docstrings.py
new file mode 100644
index 000000000..c427fa6f9
--- /dev/null
+++ b/tests/malware_analyzer/pypi/test_matching_docstrings.py
@@ -0,0 +1,103 @@
+# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Tests for the MatchingDocstringsAnalyzer heuristic."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult
+from macaron.malware_analyzer.pypi_heuristics.sourcecode.matching_docstrings import MatchingDocstringsAnalyzer, Result
+
+
+@pytest.fixture(name="analyzer")
+def analyzer_() -> MatchingDocstringsAnalyzer:
+    """Pytest fixture to create a MatchingDocstringsAnalyzer instance."""
+    return MatchingDocstringsAnalyzer()
+
+
+@pytest.fixture(autouse=True)
+def skip_if_client_disabled(analyzer: MatchingDocstringsAnalyzer) -> None:
+    """
+    Automatically skip tests in this file if the AI client is disabled.
+    """
+    if not analyzer.client.enabled:
+        pytest.skip("AI client disabled - skipping test")
+
+
+def test_analyze_consistent_docstrings_pass(analyzer: MatchingDocstringsAnalyzer, pypi_package_json: MagicMock) -> None:
+    """Test the analyzer passes when docstrings are consistent with the code."""
+    pypi_package_json.download_sourcecode.return_value = True
+    pypi_package_json.iter_sourcecode.return_value = [("test.py", b"def func():\n    '''docstring'''\n    pass")]
+
+    mock_result = Result(decision="consistent", reason="The code is consistent with the docstring.")
+
+    with patch.object(analyzer.client, "invoke", return_value=mock_result) as mock_invoke:
+        result, info = analyzer.analyze(pypi_package_json)
+        assert result == HeuristicResult.PASS
+        assert not info
+        mock_invoke.assert_called_once()
+
+
+def test_analyze_inconsistent_docstrings_fail(
+    analyzer: MatchingDocstringsAnalyzer, pypi_package_json: MagicMock
+) -> None:
+    """Test the analyzer fails when docstrings are inconsistent with the code."""
+    pypi_package_json.download_sourcecode.return_value = True
+    pypi_package_json.iter_sourcecode.return_value = [
+        ("test.py", b"def func():\n    '''docstring'''\n    print('hello')")
+    ]
+
+    mock_result = Result(
+        decision="inconsistent",
+        reason="The docstring does not mention the print statement.",
+        inconsistent_code_part="print('hello')",
+    )
+
+    with patch.object(analyzer.client, "invoke", return_value=mock_result):
+        result, info = analyzer.analyze(pypi_package_json)
+        assert result == HeuristicResult.FAIL
+        assert info["file"] == "test.py"
+        assert info["reason"] == "The docstring does not mention the print statement."
+        assert info["inconsistent part"] == "print('hello')"
+
+
+def test_analyze_ai_client_disabled_skip(analyzer: MatchingDocstringsAnalyzer, pypi_package_json: MagicMock) -> None:
+    """Test the analyzer skips when the AI client is disabled."""
+    with patch.object(analyzer.client, "enabled", False):
+        result, info = analyzer.analyze(pypi_package_json)
+        assert result == HeuristicResult.SKIP
+        assert not info
+
+
+def test_analyze_no_source_code_skip(analyzer: MatchingDocstringsAnalyzer, pypi_package_json: MagicMock) -> None:
+    """Test the analyzer skips if the source code cannot be downloaded."""
+    pypi_package_json.download_sourcecode.return_value = False
+    with patch.object(analyzer.client, "invoke") as mock_invoke:
+        result, info = analyzer.analyze(pypi_package_json)
+        assert result == HeuristicResult.SKIP
+        assert not info
+        mock_invoke.assert_not_called()
+
+
+def test_analyze_no_python_files_pass(analyzer: MatchingDocstringsAnalyzer, pypi_package_json: MagicMock) -> None:
+    """Test the analyzer passes if there are no Python files in the source code."""
+    pypi_package_json.download_sourcecode.return_value = True
+    pypi_package_json.iter_sourcecode.return_value = [("README.md", b"This is a test package.")]
+    with patch.object(analyzer.client, "invoke") as mock_invoke:
+        result, info = analyzer.analyze(pypi_package_json)
+        assert result == HeuristicResult.PASS
+        assert not info
+        mock_invoke.assert_not_called()
+
+
+def test_analyze_llm_invocation_error_pass(analyzer: MatchingDocstringsAnalyzer, pypi_package_json: MagicMock) -> None:
+    """Test the analyzer passes if the LLM invocation returns None (e.g., on API error)."""
+    pypi_package_json.download_sourcecode.return_value = True
+    pypi_package_json.iter_sourcecode.return_value = [("test.py", b"def func():\n    pass")]
+
+    with patch.object(analyzer.client, "invoke", return_value=None):
+        result, info = analyzer.analyze(pypi_package_json)
+        assert result == HeuristicResult.PASS
+        assert not info

From 65e54a16b2cd4d78c0ccc0ea9e21de06f7e82c61 Mon Sep 17 00:00:00 2001
From: Amine <amine.raouane@enim.ac.ma>
Date: Thu, 24 Jul 2025 10:52:16 +0100
Subject: [PATCH 2/5] feat(ai): improve robustness of AI client

Signed-off-by: Amine <amine.raouane@enim.ac.ma>
---
 pyproject.toml                                |   1 +
 src/macaron/ai.py                             | 175 ------------------
 src/macaron/ai/README.md                      |  50 +++++
 src/macaron/ai/__init__.py                    |   2 +
 src/macaron/ai/ai_client.py                   |  53 ++++++
 src/macaron/ai/ai_factory.py                  |  70 +++++++
 src/macaron/ai/ai_tools.py                    |  53 ++++++
 src/macaron/ai/openai_client.py               | 100 ++++++++++
 src/macaron/config/defaults.ini               |   6 +-
 .../sourcecode/matching_docstrings.py         |  13 +-
 .../pypi/test_matching_docstrings.py          |  10 +-
 11 files changed, 344 insertions(+), 189 deletions(-)
 delete mode 100644 src/macaron/ai.py
 create mode 100644 src/macaron/ai/README.md
 create mode 100644 src/macaron/ai/__init__.py
 create mode 100644 src/macaron/ai/ai_client.py
 create mode 100644 src/macaron/ai/ai_factory.py
 create mode 100644 src/macaron/ai/ai_tools.py
 create mode 100644 src/macaron/ai/openai_client.py

diff --git a/pyproject.toml b/pyproject.toml
index 40d99dcec..b87768cb6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,6 +39,7 @@ dependencies = [
     "cryptography >=44.0.0,<45.0.0",
     "semgrep == 1.113.0",
     "email-validator >=2.2.0,<3.0.0",
+    "pydantic >= 2.11.5,<2.12.0",
 ]
 keywords = []
 # https://pypi.org/classifiers/
diff --git a/src/macaron/ai.py b/src/macaron/ai.py
deleted file mode 100644
index eb48ba08b..000000000
--- a/src/macaron/ai.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
-# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
-
-"""This module provides a client for interacting with a Large Language Model (LLM)."""
-
-import json
-import logging
-import re
-from typing import Any, TypeVar
-
-from pydantic import BaseModel, ValidationError
-
-from macaron.config.defaults import defaults
-from macaron.errors import ConfigurationError, HeuristicAnalyzerValueError
-from macaron.util import send_post_http_raw
-
-logger: logging.Logger = logging.getLogger(__name__)
-
-T = TypeVar("T", bound=BaseModel)
-
-
-class AIClient:
-    """A client for interacting with a Large Language Model."""
-
-    def __init__(self, system_prompt: str):
-        """
-        Initialize the AI client.
-
-        The LLM configuration (enabled, API key, endpoint, model) is read from defaults.
-        """
-        self.enabled, self.api_endpoint, self.api_key, self.model, self.context_window = self._load_defaults()
-        self.system_prompt = system_prompt.strip() or "You are a helpful AI assistant."
-        logger.info("AI client is %s.", "enabled" if self.enabled else "disabled")
-
-    def _load_defaults(self) -> tuple[bool, str, str, str, int]:
-        """Load the LLM configuration from the defaults."""
-        section_name = "llm"
-        enabled, api_key, api_endpoint, model, context_window = False, "", "", "", 10000
-
-        if defaults.has_section(section_name):
-            section = defaults[section_name]
-            enabled = section.get("enabled", "False").strip().lower() == "true"
-            api_key = section.get("api_key", "").strip()
-            api_endpoint = section.get("api_endpoint", "").strip()
-            model = section.get("model", "").strip()
-            context_window = section.getint("context_window", 10000)
-
-        if enabled:
-            if not api_key:
-                raise ConfigurationError("API key for the AI client is not configured.")
-            if not api_endpoint:
-                raise ConfigurationError("API endpoint for the AI client is not configured.")
-            if not model:
-                raise ConfigurationError("Model for the AI client is not configured.")
-
-        return enabled, api_endpoint, api_key, model, context_window
-
-    def _validate_response(self, response_text: str, response_model: type[T]) -> T:
-        """
-        Validate and parse the response from the LLM.
-
-        If raw JSON parsing fails, attempts to extract a JSON object from text.
-
-        Parameters
-        ----------
-        response_text: str
-            The response text from the LLM.
-        response_model: Type[T]
-            The Pydantic model to validate the response against.
-
-        Returns
-        -------
-        bool
-            The validated Pydantic model instance.
-
-        Raises
-        ------
-        HeuristicAnalyzerValueError
-            If there is an error in parsing or validating the response.
-        """
-        try:
-            data = json.loads(response_text)
-        except json.JSONDecodeError:
-            logger.debug("Full JSON parse failed; trying to extract JSON from text.")
-            # If the response is not a valid JSON, try to extract a JSON object from the text.
-            match = re.search(r"\{.*\}", response_text, re.DOTALL)
-            if not match:
-                raise HeuristicAnalyzerValueError("No JSON object found in the LLM response.") from match
-            try:
-                data = json.loads(match.group(0))
-            except json.JSONDecodeError as e:
-                logger.error("Failed to parse extracted JSON: %s", e)
-                raise HeuristicAnalyzerValueError("Invalid JSON extracted from response.") from e
-
-        try:
-            return response_model.model_validate(data)
-        except ValidationError as e:
-            logger.error("Validation failed against response model: %s", e)
-            raise HeuristicAnalyzerValueError("Response JSON validation failed.") from e
-
-    def invoke(
-        self,
-        user_prompt: str,
-        temperature: float = 0.2,
-        max_tokens: int = 4000,
-        structured_output: type[T] | None = None,
-        timeout: int = 30,
-    ) -> Any:
-        """
-        Invoke the LLM and optionally validate its response.
-
-        Parameters
-        ----------
-        user_prompt: str
-            The user prompt to send to the LLM.
-        temperature: float
-            The temperature for the LLM response.
-        max_tokens: int
-            The maximum number of tokens for the LLM response.
-        structured_output: Optional[Type[T]]
-            The Pydantic model to validate the response against. If provided, the response will be parsed and validated.
-        timeout: int
-            The timeout for the HTTP request in seconds.
-
-        Returns
-        -------
-        Optional[T | str]
-            The validated Pydantic model instance if `structured_output` is provided,
-            or the raw string response if not.
-
-        Raises
-        ------
-        HeuristicAnalyzerValueError
-            If there is an error in parsing or validating the response.
-        """
-        if not self.enabled:
-            raise ConfigurationError("AI client is not enabled. Please check your configuration.")
-
-        if len(user_prompt.split()) > self.context_window:
-            logger.warning(
-                "User prompt exceeds context window (%s words). "
-                "Truncating the prompt to fit within the context window.",
-                self.context_window,
-            )
-            user_prompt = " ".join(user_prompt.split()[: self.context_window])
-
-        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
-        payload = {
-            "model": self.model,
-            "messages": [{"role": "system", "content": self.system_prompt}, {"role": "user", "content": user_prompt}],
-            "temperature": temperature,
-            "max_tokens": max_tokens,
-        }
-
-        try:
-            response = send_post_http_raw(url=self.api_endpoint, json_data=payload, headers=headers, timeout=timeout)
-            if not response:
-                raise HeuristicAnalyzerValueError("No response received from the LLM.")
-            response_json = response.json()
-            usage = response_json.get("usage", {})
-
-            if usage:
-                usage_str = ", ".join(f"{key} = {value}" for key, value in usage.items())
-                logger.info("LLM call token usage: %s", usage_str)
-
-            message_content = response_json["choices"][0]["message"]["content"]
-
-            if not structured_output:
-                logger.debug("Returning raw message content (no structured output requested).")
-                return message_content
-            return self._validate_response(message_content, structured_output)
-
-        except Exception as e:
-            logger.error("Error during LLM invocation: %s", e)
-            raise HeuristicAnalyzerValueError(f"Failed to get or validate LLM response: {e}") from e
diff --git a/src/macaron/ai/README.md b/src/macaron/ai/README.md
new file mode 100644
index 000000000..28ddf4757
--- /dev/null
+++ b/src/macaron/ai/README.md
@@ -0,0 +1,50 @@
+# Macaron AI Module
+
+This module provides the foundation for interacting with Large Language Models (LLMs) in a provider-agnostic way. It includes an abstract client definition, provider-specific client implementations, a client factory, and utility functions for processing responses.
+
+## Module Components
+
+- **ai_client.py**
+  Defines the abstract [`AIClient`](./ai_client.py) class. This class handles the initialization of LLM configuration from the defaults and serves as the base for all specific AI client implementations.
+
+- **openai_client.py**
+  Implements the [`OpenAiClient`](./openai_client.py) class, a concrete subclass of [`AIClient`](./ai_client.py). This client interacts with OpenAI-like APIs by sending requests using HTTP and processing the responses. It also validates and structures responses using the tools provided.
+
+- **ai_factory.py**
+  Contains the [`AIClientFactory`](./ai_factory.py) class, which is responsible for reading provider configuration from the defaults and creating the correct AI client instance.
+
+- **ai_tools.py**
+  Offers utility functions such as `structure_response` to assist with parsing and validating the JSON response returned by an LLM. These functions ensure that responses conform to a given Pydantic model for easier downstream processing.
+
+## Usage
+
+1. **Configuration:**
+   The module reads the LLM configuration from the application defaults (using the `defaults` module). Make sure that the `llm` section in your configuration includes valid settings such as `enabled`, `api_key`, `api_endpoint`, `model`, and `context_window`.
+
+2. **Creating a Client:**
+   Use the [`AIClientFactory`](./ai_factory.py) to create an AI client instance. The factory checks the configured provider and returns a client (e.g., an instance of [`OpenAiClient`](./openai_client.py)) that can be used to invoke the LLM.
+
+   Example:
+   ```py
+   from macaron.ai.ai_factory import AIClientFactory
+
+   factory = AIClientFactory()
+   client = factory.create_client(system_prompt="You are a helpful assistant.")
+   response = client.invoke("Hello, how can you assist me?")
+   print(response)
+   ```
+
+3. **Response Processing:**
+   When a structured response is required, pass a Pydantic model class to the `invoke` method. The [`ai_tools.py`](./ai_tools.py) module takes care of parsing and validating the response to ensure it meets the expected structure.
+
+## Logging and Error Handling
+
+- The module uses Python's logging framework to report important events, such as token usage and warnings when prompts exceed the allowed context window.
+- Configuration errors (e.g., missing API key or endpoint) are handled by raising descriptive exceptions, such as those defined in the [`ConfigurationError`](../errors.py).
+
+## Extensibility
+
+The design of the AI module is provider-agnostic. To add support for additional LLM providers:
+- Implement a new client by subclassing [`AIClient`](./ai_client.py).
+- Add the new client to the [`PROVIDER_MAPPING`](./ai_factory.py).
+- Update the configuration defaults accordingly.
diff --git a/src/macaron/ai/__init__.py b/src/macaron/ai/__init__.py
new file mode 100644
index 000000000..8e17a3508
--- /dev/null
+++ b/src/macaron/ai/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
diff --git a/src/macaron/ai/ai_client.py b/src/macaron/ai/ai_client.py
new file mode 100644
index 000000000..35733e5d8
--- /dev/null
+++ b/src/macaron/ai/ai_client.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module defines the abstract AIClient class for implementing AI clients."""
+
+import logging
+from abc import ABC, abstractmethod
+from typing import Any, TypeVar
+
+from pydantic import BaseModel
+
+T = TypeVar("T", bound=BaseModel)
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+class AIClient(ABC):
+    """This abstract class is used to implement ai clients."""
+
+    def __init__(self, system_prompt: str, defaults: dict) -> None:
+        """
+        Initialize the AI client.
+
+        The LLM configuration is read from defaults.
+        """
+        self.system_prompt = system_prompt
+        self.defaults = defaults
+
+    @abstractmethod
+    def invoke(
+        self,
+        user_prompt: str,
+        temperature: float = 0.2,
+        structured_output: type[T] | None = None,
+    ) -> Any:
+        """
+        Invoke the LLM and optionally validate its response.
+
+        Parameters
+        ----------
+        user_prompt: str
+            The user prompt to send to the LLM.
+        temperature: float
+            The temperature for the LLM response.
+        structured_output: Optional[Type[T]]
+            The Pydantic model to validate the response against. If provided, the response will be parsed and validated.
+
+        Returns
+        -------
+        Optional[T | str]
+            The validated Pydantic model instance if `structured_output` is provided,
+            or the raw string response if not.
+        """
diff --git a/src/macaron/ai/ai_factory.py b/src/macaron/ai/ai_factory.py
new file mode 100644
index 000000000..9462ebf86
--- /dev/null
+++ b/src/macaron/ai/ai_factory.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module defines the AIClientFactory class for creating AI clients based on provider configuration."""
+
+import logging
+
+from macaron.ai.ai_client import AIClient
+from macaron.ai.openai_client import OpenAiClient
+from macaron.config.defaults import defaults
+from macaron.errors import ConfigurationError
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+class AIClientFactory:
+    """Factory to create AI clients based on provider configuration."""
+
+    PROVIDER_MAPPING: dict[str, type[AIClient]] = {"openai": OpenAiClient}
+
+    def __init__(self) -> None:
+        """
+        Initialize the AI client.
+
+        The LLM configuration is read from defaults.
+        """
+        self.defaults = self._load_defaults()
+
+    def _load_defaults(self) -> dict:
+        section_name = "llm"
+        default_values = {
+            "enabled": False,
+            "provider": "",
+            "api_key": "",
+            "api_endpoint": "",
+            "model": "",
+            "context_window": 10000,
+        }
+
+        if defaults.has_section(section_name):
+            section = defaults[section_name]
+            default_values["enabled"] = section.getboolean("enabled", default_values["enabled"])
+            default_values["api_key"] = str(section.get("api_key", default_values["api_key"])).strip().lower()
+            default_values["api_endpoint"] = (
+                str(section.get("api_endpoint", default_values["api_endpoint"])).strip().lower()
+            )
+            default_values["model"] = str(section.get("model", default_values["model"])).strip().lower()
+            default_values["provider"] = str(section.get("provider", default_values["provider"])).strip().lower()
+            default_values["context_window"] = section.getint("context_window", 10000)
+
+        if default_values["enabled"]:
+            for key, value in default_values.items():
+                if not value:
+                    raise ConfigurationError(
+                        f"AI client configuration '{key}' is required but not set in the defaults."
+                    )
+
+        return default_values
+
+    def create_client(self, system_prompt: str) -> AIClient | None:
+        """Create an AI client based on the configured provider."""
+        client_class = self.PROVIDER_MAPPING.get(self.defaults["provider"])
+        if client_class is None:
+            logger.error("Provider '%s' is not supported.", self.defaults["provider"])
+            return None
+        return client_class(system_prompt, self.defaults)
+
+    def list_available_providers(self) -> list[str]:
+        """List all registered providers."""
+        return list(self.PROVIDER_MAPPING.keys())
diff --git a/src/macaron/ai/ai_tools.py b/src/macaron/ai/ai_tools.py
new file mode 100644
index 000000000..e476376f9
--- /dev/null
+++ b/src/macaron/ai/ai_tools.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module provides utility functions for Large Language Model (LLM)."""
+import json
+import logging
+import re
+from typing import TypeVar
+
+from pydantic import BaseModel, ValidationError
+
+T = TypeVar("T", bound=BaseModel)
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+def structure_response(response_text: str, response_model: type[T]) -> T | None:
+    """
+    Structure and parse the response from the LLM.
+
+    If raw JSON parsing fails, attempts to extract a JSON object from text.
+
+    Parameters
+    ----------
+    response_text: str
+        The response text from the LLM.
+    response_model: Type[T]
+        The Pydantic model to structure the response against.
+
+    Returns
+    -------
+    T | None
+        The structured Pydantic model instance.
+    """
+    try:
+        data = json.loads(response_text)
+    except json.JSONDecodeError:
+        logger.debug("Full JSON parse failed; trying to extract JSON from text.")
+        # If the response is not a valid JSON, try to extract a JSON object from the text.
+        match = re.search(r"\{.*\}", response_text, re.DOTALL)
+        if not match:
+            return None
+        try:
+            data = json.loads(match.group(0))
+        except json.JSONDecodeError as e:
+            logger.debug("Failed to parse extracted JSON: %s", e)
+            return None
+
+    try:
+        return response_model.model_validate(data)
+    except ValidationError as e:
+        logger.debug("Validation failed against response model: %s", e)
+        return None
diff --git a/src/macaron/ai/openai_client.py b/src/macaron/ai/openai_client.py
new file mode 100644
index 000000000..cd856745c
--- /dev/null
+++ b/src/macaron/ai/openai_client.py
@@ -0,0 +1,100 @@
+# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module provides a client for interacting with a Large Language Model (LLM) that is Openai like."""
+
+import logging
+from typing import Any, TypeVar
+
+from pydantic import BaseModel
+
+from macaron.ai.ai_client import AIClient
+from macaron.ai.ai_tools import structure_response
+from macaron.errors import ConfigurationError, HeuristicAnalyzerValueError
+from macaron.util import send_post_http_raw
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+T = TypeVar("T", bound=BaseModel)
+
+
+class OpenAiClient(AIClient):
+    """A client for interacting with a Large Language Model that is OpenAI API like."""
+
+    def invoke(
+        self,
+        user_prompt: str,
+        temperature: float = 0.2,
+        structured_output: type[T] | None = None,
+        max_tokens: int = 4000,
+        timeout: int = 30,
+    ) -> Any:
+        """
+        Invoke the LLM and optionally validate its response.
+
+        Parameters
+        ----------
+        user_prompt: str
+            The user prompt to send to the LLM.
+        temperature: float
+            The temperature for the LLM response.
+        structured_output: Optional[Type[T]]
+            The Pydantic model to validate the response against. If provided, the response will be parsed and validated.
+        max_tokens: int
+            The maximum number of tokens for the LLM response.
+        timeout: int
+            The timeout for the HTTP request in seconds.
+
+        Returns
+        -------
+        Optional[T | str]
+            The validated Pydantic model instance if `structured_output` is provided,
+            or the raw string response if not.
+
+        Raises
+        ------
+        HeuristicAnalyzerValueError
+            If there is an error in parsing or validating the response.
+        """
+        if not self.defaults["enabled"]:
+            raise ConfigurationError("AI client is not enabled. Please check your configuration.")
+
+        if len(user_prompt.split()) > self.defaults["context_window"]:
+            logger.warning(
+                "User prompt exceeds context window (%s words). "
+                "Truncating the prompt to fit within the context window.",
+                self.defaults["context_window"],
+            )
+            user_prompt = " ".join(user_prompt.split()[: self.defaults["context_window"]])
+
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.defaults["api_key"]}"}
+        payload = {
+            "model": self.defaults["model"],
+            "messages": [{"role": "system", "content": self.system_prompt}, {"role": "user", "content": user_prompt}],
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+
+        try:
+            response = send_post_http_raw(
+                url=self.defaults["api_endpoint"], json_data=payload, headers=headers, timeout=timeout
+            )
+            if not response:
+                raise HeuristicAnalyzerValueError("No response received from the LLM.")
+            response_json = response.json()
+            usage = response_json.get("usage", {})
+
+            if usage:
+                usage_str = ", ".join(f"{key} = {value}" for key, value in usage.items())
+                logger.info("LLM call token usage: %s", usage_str)
+
+            message_content = response_json["choices"][0]["message"]["content"]
+
+            if not structured_output:
+                logger.debug("Returning raw message content (no structured output requested).")
+                return message_content
+            return structure_response(message_content, structured_output)
+
+        except Exception as e:
+            logger.error("Error during LLM invocation: %s", e)
+            raise HeuristicAnalyzerValueError(f"Failed to get or validate LLM response: {e}") from e
diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini
index 0d43d46bc..5f40e8ac5 100644
--- a/src/macaron/config/defaults.ini
+++ b/src/macaron/config/defaults.ini
@@ -636,7 +636,11 @@ disabled_custom_rulesets =
 [llm]
 # The LLM configuration for Macaron.
 # If enabled, the LLM will be used to analyze the results and provide insights.
-enabled =
+enabled = False
+# The provider for the LLM service.
+# Supported providers :
+# - openai: OpenAI's GPT models.
+provider =
 # The API key for the LLM service.
 api_key =
 # The API endpoint for the LLM service.
diff --git a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py
index ca9cafbe3..bd5a864da 100644
--- a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py
+++ b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py
@@ -9,7 +9,7 @@
 
 from pydantic import BaseModel, Field
 
-from macaron.ai import AIClient
+from macaron.ai.ai_factory import AIClientFactory
 from macaron.json_tools import JsonType
 from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
 from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
@@ -60,7 +60,13 @@ def __init__(self) -> None:
             heuristic=Heuristics.MATCHING_DOCSTRINGS,
             depends_on=None,
         )
-        self.client = AIClient(system_prompt=self.SYSTEM_PROMPT.strip())
+        factory = AIClientFactory()
+        client = None
+
+        if factory.defaults["enabled"]:
+            client = factory.create_client(self.SYSTEM_PROMPT.strip())
+
+        self.client = client
 
     def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
         """Analyze the package.
@@ -75,8 +81,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
         tuple[HeuristicResult, dict[str, JsonType]]:
             The result and related information collected during the analysis.
         """
-        if not self.client.enabled:
-            logger.warning("AI client is not enabled, skipping the matching docstrings analysis.")
+        if not self.client:
             return HeuristicResult.SKIP, {}
 
         download_result = pypi_package_json.download_sourcecode()
diff --git a/tests/malware_analyzer/pypi/test_matching_docstrings.py b/tests/malware_analyzer/pypi/test_matching_docstrings.py
index c427fa6f9..f051bf76c 100644
--- a/tests/malware_analyzer/pypi/test_matching_docstrings.py
+++ b/tests/malware_analyzer/pypi/test_matching_docstrings.py
@@ -22,7 +22,7 @@ def skip_if_client_disabled(analyzer: MatchingDocstringsAnalyzer) -> None:
     """
     Automatically skip tests in this file if the AI client is disabled.
     """
-    if not analyzer.client.enabled:
+    if not analyzer.client:
         pytest.skip("AI client disabled - skipping test")
 
 
@@ -63,14 +63,6 @@ def test_analyze_inconsistent_docstrings_fail(
         assert info["inconsistent part"] == "print('hello')"
 
 
-def test_analyze_ai_client_disabled_skip(analyzer: MatchingDocstringsAnalyzer, pypi_package_json: MagicMock) -> None:
-    """Test the analyzer skips when the AI client is disabled."""
-    with patch.object(analyzer.client, "enabled", False):
-        result, info = analyzer.analyze(pypi_package_json)
-        assert result == HeuristicResult.SKIP
-        assert not info
-
-
 def test_analyze_no_source_code_skip(analyzer: MatchingDocstringsAnalyzer, pypi_package_json: MagicMock) -> None:
     """Test the analyzer skips if the source code cannot be downloaded."""
     pypi_package_json.download_sourcecode.return_value = False

From 6da5458de8e973b0de2eec0a70049b92dafbd388 Mon Sep 17 00:00:00 2001
From: Amine <amine.raouane@enim.ac.ma>
Date: Fri, 15 Aug 2025 02:19:11 +0100
Subject: [PATCH 3/5] feat: add Inconsistent Description heuristic

Signed-off-by: Amine <amine.raouane@enim.ac.ma>
---
 src/macaron/ai/README.md                      |  14 +--
 src/macaron/ai/ai_tools.py                    |  22 +---
 src/macaron/ai/clients/__init__.py            |   9 ++
 src/macaron/ai/{ => clients}/ai_factory.py    |  34 +++---
 .../ai/{ai_client.py => clients/base.py}      |  24 ++--
 src/macaron/ai/{ => clients}/openai_client.py |  33 ++----
 src/macaron/ai/prompts/__init__.py            |   2 +
 src/macaron/ai/schemas/__init__.py            |   2 +
 src/macaron/config/defaults.ini               |   3 -
 .../pypi_heuristics/heuristics.py             |   3 +
 .../metadata/inconsistent_description.py      | 107 ++++++++++++++++++
 .../sourcecode/matching_docstrings.py         |  66 ++++++-----
 .../checks/detect_malicious_metadata_check.py |   3 +
 .../pypi/test_inconsistent_description.py     |  75 ++++++++++++
 .../pypi/test_matching_docstrings.py          |  18 +--
 15 files changed, 292 insertions(+), 123 deletions(-)
 create mode 100644 src/macaron/ai/clients/__init__.py
 rename src/macaron/ai/{ => clients}/ai_factory.py (58%)
 rename src/macaron/ai/{ai_client.py => clients/base.py} (62%)
 rename src/macaron/ai/{ => clients}/openai_client.py (69%)
 create mode 100644 src/macaron/ai/prompts/__init__.py
 create mode 100644 src/macaron/ai/schemas/__init__.py
 create mode 100644 src/macaron/malware_analyzer/pypi_heuristics/metadata/inconsistent_description.py
 create mode 100644 tests/malware_analyzer/pypi/test_inconsistent_description.py

diff --git a/src/macaron/ai/README.md b/src/macaron/ai/README.md
index 28ddf4757..9fc5b0b30 100644
--- a/src/macaron/ai/README.md
+++ b/src/macaron/ai/README.md
@@ -5,13 +5,13 @@ This module provides the foundation for interacting with Large Language Models (
 ## Module Components
 
 - **ai_client.py**
-  Defines the abstract [`AIClient`](./ai_client.py) class. This class handles the initialization of LLM configuration from the defaults and serves as the base for all specific AI client implementations.
+  Defines the abstract [`AIClient`](./clients/base.py) class. This class handles the initialization of LLM configuration from the defaults and serves as the base for all specific AI client implementations.
 
 - **openai_client.py**
-  Implements the [`OpenAiClient`](./openai_client.py) class, a concrete subclass of [`AIClient`](./ai_client.py). This client interacts with OpenAI-like APIs by sending requests using HTTP and processing the responses. It also validates and structures responses using the tools provided.
+  Implements the [`OpenAiClient`](./clients/openai_client.py) class, a concrete subclass of [`AIClient`](./ai_client.py). This client interacts with OpenAI-like APIs by sending requests using HTTP and processing the responses. It also validates and structures responses using the tools provided.
 
 - **ai_factory.py**
-  Contains the [`AIClientFactory`](./ai_factory.py) class, which is responsible for reading provider configuration from the defaults and creating the correct AI client instance.
+  Contains the [`AIClientFactory`](./clients/base.py) class, which is responsible for reading provider configuration from the defaults and creating the correct AI client instance.
 
 - **ai_tools.py**
   Offers utility functions such as `structure_response` to assist with parsing and validating the JSON response returned by an LLM. These functions ensure that responses conform to a given Pydantic model for easier downstream processing.
@@ -22,11 +22,11 @@ This module provides the foundation for interacting with Large Language Models (
    The module reads the LLM configuration from the application defaults (using the `defaults` module). Make sure that the `llm` section in your configuration includes valid settings such as `enabled`, `api_key`, `api_endpoint`, `model`, and `context_window`.
 
 2. **Creating a Client:**
-   Use the [`AIClientFactory`](./ai_factory.py) to create an AI client instance. The factory checks the configured provider and returns a client (e.g., an instance of [`OpenAiClient`](./openai_client.py)) that can be used to invoke the LLM.
+   Use the [`AIClientFactory`](./clients/ai_factory.py) to create an AI client instance. The factory checks the configured provider and returns a client (e.g., an instance of [`OpenAiClient`](./clients/openai_client.py)) that can be used to invoke the LLM.
 
    Example:
    ```py
-   from macaron.ai.ai_factory import AIClientFactory
+   from macaron.ai.clients.ai_factory import AIClientFactory
 
    factory = AIClientFactory()
    client = factory.create_client(system_prompt="You are a helpful assistant.")
@@ -45,6 +45,6 @@ This module provides the foundation for interacting with Large Language Models (
 ## Extensibility
 
 The design of the AI module is provider-agnostic. To add support for additional LLM providers:
-- Implement a new client by subclassing [`AIClient`](./ai_client.py).
-- Add the new client to the [`PROVIDER_MAPPING`](./ai_factory.py).
+- Implement a new client by subclassing [`AIClient`](./clients/base.py).
+- Add the new client to the [`PROVIDER_MAPPING`](./clients/ai_factory.py).
 - Update the configuration defaults accordingly.
diff --git a/src/macaron/ai/ai_tools.py b/src/macaron/ai/ai_tools.py
index e476376f9..d5704a80c 100644
--- a/src/macaron/ai/ai_tools.py
+++ b/src/macaron/ai/ai_tools.py
@@ -5,18 +5,14 @@
 import json
 import logging
 import re
-from typing import TypeVar
-
-from pydantic import BaseModel, ValidationError
-
-T = TypeVar("T", bound=BaseModel)
+from typing import Any
 
 logger: logging.Logger = logging.getLogger(__name__)
 
 
-def structure_response(response_text: str, response_model: type[T]) -> T | None:
+def extract_json(response_text: str) -> Any:
     """
-    Structure and parse the response from the LLM.
+    Parse the response from the LLM.
 
     If raw JSON parsing fails, attempts to extract a JSON object from text.
 
@@ -24,13 +20,11 @@ def structure_response(response_text: str, response_model: type[T]) -> T | None:
     ----------
     response_text: str
         The response text from the LLM.
-    response_model: Type[T]
-        The Pydantic model to structure the response against.
 
     Returns
     -------
-    T | None
-        The structured Pydantic model instance.
+    dict[str, Any] | None
+        The structured JSON object.
     """
     try:
         data = json.loads(response_text)
@@ -46,8 +40,4 @@ def structure_response(response_text: str, response_model: type[T]) -> T | None:
             logger.debug("Failed to parse extracted JSON: %s", e)
             return None
 
-    try:
-        return response_model.model_validate(data)
-    except ValidationError as e:
-        logger.debug("Validation failed against response model: %s", e)
-        return None
+    return data
diff --git a/src/macaron/ai/clients/__init__.py b/src/macaron/ai/clients/__init__.py
new file mode 100644
index 000000000..7450cef22
--- /dev/null
+++ b/src/macaron/ai/clients/__init__.py
@@ -0,0 +1,9 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module provides a mapping of AI client providers to their respective client classes."""
+
+from macaron.ai.clients.base import AIClient
+from macaron.ai.clients.openai_client import OpenAiClient
+
+PROVIDER_MAPPING: dict[str, type[AIClient]] = {"openai": OpenAiClient}
diff --git a/src/macaron/ai/ai_factory.py b/src/macaron/ai/clients/ai_factory.py
similarity index 58%
rename from src/macaron/ai/ai_factory.py
rename to src/macaron/ai/clients/ai_factory.py
index 9462ebf86..5df841efe 100644
--- a/src/macaron/ai/ai_factory.py
+++ b/src/macaron/ai/clients/ai_factory.py
@@ -5,8 +5,8 @@
 
 import logging
 
-from macaron.ai.ai_client import AIClient
-from macaron.ai.openai_client import OpenAiClient
+from macaron.ai.clients import PROVIDER_MAPPING
+from macaron.ai.clients.base import AIClient
 from macaron.config.defaults import defaults
 from macaron.errors import ConfigurationError
 
@@ -16,17 +16,15 @@
 class AIClientFactory:
     """Factory to create AI clients based on provider configuration."""
 
-    PROVIDER_MAPPING: dict[str, type[AIClient]] = {"openai": OpenAiClient}
-
     def __init__(self) -> None:
         """
         Initialize the AI client.
 
         The LLM configuration is read from defaults.
         """
-        self.defaults = self._load_defaults()
+        self.params = self._load_defaults()
 
-    def _load_defaults(self) -> dict:
+    def _load_defaults(self) -> dict | None:
         section_name = "llm"
         default_values = {
             "enabled": False,
@@ -34,19 +32,14 @@ def _load_defaults(self) -> dict:
             "api_key": "",
             "api_endpoint": "",
             "model": "",
-            "context_window": 10000,
         }
 
         if defaults.has_section(section_name):
             section = defaults[section_name]
             default_values["enabled"] = section.getboolean("enabled", default_values["enabled"])
-            default_values["api_key"] = str(section.get("api_key", default_values["api_key"])).strip().lower()
-            default_values["api_endpoint"] = (
-                str(section.get("api_endpoint", default_values["api_endpoint"])).strip().lower()
-            )
-            default_values["model"] = str(section.get("model", default_values["model"])).strip().lower()
-            default_values["provider"] = str(section.get("provider", default_values["provider"])).strip().lower()
-            default_values["context_window"] = section.getint("context_window", 10000)
+            for key, default_value in default_values.items():
+                if isinstance(default_value, str):
+                    default_values[key] = str(section.get(key, default_value)).strip().lower()
 
         if default_values["enabled"]:
             for key, value in default_values.items():
@@ -59,12 +52,11 @@ def _load_defaults(self) -> dict:
 
     def create_client(self, system_prompt: str) -> AIClient | None:
         """Create an AI client based on the configured provider."""
-        client_class = self.PROVIDER_MAPPING.get(self.defaults["provider"])
-        if client_class is None:
-            logger.error("Provider '%s' is not supported.", self.defaults["provider"])
+        if not self.params or not self.params["enabled"]:
             return None
-        return client_class(system_prompt, self.defaults)
 
-    def list_available_providers(self) -> list[str]:
-        """List all registered providers."""
-        return list(self.PROVIDER_MAPPING.keys())
+        client_class = PROVIDER_MAPPING.get(self.params["provider"])
+        if client_class is None:
+            logger.error("Provider '%s' is not supported.", self.params["provider"])
+            return None
+        return client_class(system_prompt, self.params)
diff --git a/src/macaron/ai/ai_client.py b/src/macaron/ai/clients/base.py
similarity index 62%
rename from src/macaron/ai/ai_client.py
rename to src/macaron/ai/clients/base.py
index 35733e5d8..5177ae8aa 100644
--- a/src/macaron/ai/ai_client.py
+++ b/src/macaron/ai/clients/base.py
@@ -3,36 +3,28 @@
 
 """This module defines the abstract AIClient class for implementing AI clients."""
 
-import logging
 from abc import ABC, abstractmethod
-from typing import Any, TypeVar
-
-from pydantic import BaseModel
-
-T = TypeVar("T", bound=BaseModel)
-
-logger: logging.Logger = logging.getLogger(__name__)
 
 
 class AIClient(ABC):
     """This abstract class is used to implement ai clients."""
 
-    def __init__(self, system_prompt: str, defaults: dict) -> None:
+    def __init__(self, system_prompt: str, params: dict) -> None:
         """
         Initialize the AI client.
 
         The LLM configuration is read from defaults.
         """
         self.system_prompt = system_prompt
-        self.defaults = defaults
+        self.params = params
 
     @abstractmethod
     def invoke(
         self,
         user_prompt: str,
         temperature: float = 0.2,
-        structured_output: type[T] | None = None,
-    ) -> Any:
+        response_format: dict | None = None,
+    ) -> dict:
         """
         Invoke the LLM and optionally validate its response.
 
@@ -42,12 +34,12 @@ def invoke(
             The user prompt to send to the LLM.
         temperature: float
             The temperature for the LLM response.
-        structured_output: Optional[Type[T]]
-            The Pydantic model to validate the response against. If provided, the response will be parsed and validated.
+        response_format: dict | None
+            The json schema to validate the response against.
 
         Returns
         -------
-        Optional[T | str]
-            The validated Pydantic model instance if `structured_output` is provided,
+        dict
+            The validated schema if `response_format` is provided,
             or the raw string response if not.
         """
diff --git a/src/macaron/ai/openai_client.py b/src/macaron/ai/clients/openai_client.py
similarity index 69%
rename from src/macaron/ai/openai_client.py
rename to src/macaron/ai/clients/openai_client.py
index cd856745c..c788cab45 100644
--- a/src/macaron/ai/openai_client.py
+++ b/src/macaron/ai/clients/openai_client.py
@@ -8,8 +8,8 @@
 
 from pydantic import BaseModel
 
-from macaron.ai.ai_client import AIClient
-from macaron.ai.ai_tools import structure_response
+from macaron.ai.ai_tools import extract_json
+from macaron.ai.clients.base import AIClient
 from macaron.errors import ConfigurationError, HeuristicAnalyzerValueError
 from macaron.util import send_post_http_raw
 
@@ -25,7 +25,7 @@ def invoke(
         self,
         user_prompt: str,
         temperature: float = 0.2,
-        structured_output: type[T] | None = None,
+        response_format: dict | None = None,
         max_tokens: int = 4000,
         timeout: int = 30,
     ) -> Any:
@@ -38,8 +38,8 @@ def invoke(
             The user prompt to send to the LLM.
         temperature: float
             The temperature for the LLM response.
-        structured_output: Optional[Type[T]]
-            The Pydantic model to validate the response against. If provided, the response will be parsed and validated.
+        response_format: dict
+            The json schema to validate the response against. If provided, the response will be parsed and validated.
         max_tokens: int
             The maximum number of tokens for the LLM response.
         timeout: int
@@ -56,28 +56,21 @@ def invoke(
         HeuristicAnalyzerValueError
             If there is an error in parsing or validating the response.
         """
-        if not self.defaults["enabled"]:
+        if not self.params["enabled"]:
             raise ConfigurationError("AI client is not enabled. Please check your configuration.")
 
-        if len(user_prompt.split()) > self.defaults["context_window"]:
-            logger.warning(
-                "User prompt exceeds context window (%s words). "
-                "Truncating the prompt to fit within the context window.",
-                self.defaults["context_window"],
-            )
-            user_prompt = " ".join(user_prompt.split()[: self.defaults["context_window"]])
-
-        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.defaults["api_key"]}"}
+        headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.params['api_key']}"}
         payload = {
-            "model": self.defaults["model"],
+            "model": self.params["model"],
             "messages": [{"role": "system", "content": self.system_prompt}, {"role": "user", "content": user_prompt}],
+            "response_format": response_format,
             "temperature": temperature,
             "max_tokens": max_tokens,
         }
 
         try:
             response = send_post_http_raw(
-                url=self.defaults["api_endpoint"], json_data=payload, headers=headers, timeout=timeout
+                url=self.params["api_endpoint"], json_data=payload, headers=headers, timeout=timeout
             )
             if not response:
                 raise HeuristicAnalyzerValueError("No response received from the LLM.")
@@ -89,11 +82,7 @@ def invoke(
                 logger.info("LLM call token usage: %s", usage_str)
 
             message_content = response_json["choices"][0]["message"]["content"]
-
-            if not structured_output:
-                logger.debug("Returning raw message content (no structured output requested).")
-                return message_content
-            return structure_response(message_content, structured_output)
+            return extract_json(message_content)
 
         except Exception as e:
             logger.error("Error during LLM invocation: %s", e)
diff --git a/src/macaron/ai/prompts/__init__.py b/src/macaron/ai/prompts/__init__.py
new file mode 100644
index 000000000..8e17a3508
--- /dev/null
+++ b/src/macaron/ai/prompts/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
diff --git a/src/macaron/ai/schemas/__init__.py b/src/macaron/ai/schemas/__init__.py
new file mode 100644
index 000000000..8e17a3508
--- /dev/null
+++ b/src/macaron/ai/schemas/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini
index 5f40e8ac5..980e18730 100644
--- a/src/macaron/config/defaults.ini
+++ b/src/macaron/config/defaults.ini
@@ -647,6 +647,3 @@ api_key =
 api_endpoint =
 # The model to use for the LLM service.
 model =
-# The context window size for the LLM service.
-# This is the maximum number of tokens that the LLM can process in a single request.
-context_window = 10000
diff --git a/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py b/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py
index 0286cda2c..fcd88f175 100644
--- a/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py
+++ b/src/macaron/malware_analyzer/pypi_heuristics/heuristics.py
@@ -52,6 +52,9 @@ class Heuristics(str, Enum):
     #: Indicates that the package contains some code that doesn't match the docstrings.
     MATCHING_DOCSTRINGS = "matching_docstrings"
 
+    #: Indicates that the package description is inconsistent.
+    INCONSISTENT_DESCRIPTION = "inconsistent_description"
+
 
 class HeuristicResult(str, Enum):
     """Result type indicating the outcome of a heuristic."""
diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/inconsistent_description.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/inconsistent_description.py
new file mode 100644
index 000000000..31fd7ac38
--- /dev/null
+++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/inconsistent_description.py
@@ -0,0 +1,107 @@
+# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This analyzer checks if a PyPI package has unsecure description."""
+
+import logging
+
+from macaron.ai.clients.ai_factory import AIClientFactory
+from macaron.errors import HeuristicAnalyzerValueError
+from macaron.json_tools import JsonType, json_extract
+from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
+from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
+from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+class InconsistentDescriptionAnalyzer(BaseHeuristicAnalyzer):
+    """Check whether the package's description is unsecure."""
+
+    SYSTEM_PROMPT = """
+    You are a security expert analyzing a PyPI package. Determine if the package description is secure.
+    you must score between 0 and 100 based on the following criteria:
+    - High-level description summary
+    - Benefit
+    - How to install
+    - How to use
+    - Are the "How to use", "High-level description summary", and "Benefit" sections consistent?
+
+    Wrap the output in `json` tags.
+    Your response must be a JSON object matching this schema:
+    {
+        "score": 0-100,
+        "reason": "A short explanation.",
+    }
+    """
+
+    THRESHOLD = 60
+
+    RESPONSE_FORMAT = {
+        "type": "json_schema",
+        "json_schema": {
+            "name": "result_schema",
+            "strict": True,
+            "schema": {
+                "type": "object",
+                "properties": {
+                    "score": {
+                        "type": "integer",
+                        "minimum": 0,
+                        "maximum": 100,
+                        "description": "The final score from 0 to 100 after analysing the packages' description.",
+                    },
+                    "reason": {
+                        "type": "string",
+                        "description": "The reason for the overall score. It should be a short sentence explaining the decision.",
+                    },
+                },
+                "required": ["score", "reason"],
+            },
+        },
+    }
+
+    def __init__(self) -> None:
+        super().__init__(
+            name="inconsistent_description_analyzer", heuristic=Heuristics.INCONSISTENT_DESCRIPTION, depends_on=None
+        )
+        factory = AIClientFactory()
+        self.client = factory.create_client(self.SYSTEM_PROMPT.strip())
+
+    def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
+        """Analyze the package.
+
+        Parameters
+        ----------
+        pypi_package_json: PyPIPackageJsonAsset
+            The PyPI package JSON asset object.
+
+        Returns
+        -------
+        tuple[HeuristicResult, dict[str, JsonType]]:
+            The result and related information collected during the analysis.
+        """
+        if not self.client:
+            return HeuristicResult.SKIP, {}
+
+        package_json = pypi_package_json.package_json
+        info = package_json.get("info", {})
+        if not info:
+            error_msg = "No package info found in metadata"
+            logger.debug(error_msg)
+            raise HeuristicAnalyzerValueError(error_msg)
+
+        description = json_extract(package_json, ["info", "description"], str)
+        if not description or not description.strip():
+            return HeuristicResult.FAIL, {"message": "No description found."}
+
+        analysis_result = self.client.invoke(
+            user_prompt=description,
+            response_format=self.RESPONSE_FORMAT,
+        )
+
+        if analysis_result["score"] < self.THRESHOLD:
+            return HeuristicResult.FAIL, {
+                "message": f"inconsistent description with score {analysis_result['score']}. because {analysis_result['reason']}"
+            }
+        return HeuristicResult.PASS, {"message": f"consistent description with a {analysis_result['score']} score."}
diff --git a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py
index bd5a864da..a1e7fd1c0 100644
--- a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py
+++ b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py
@@ -5,11 +5,8 @@
 
 import logging
 import time
-from typing import Literal
 
-from pydantic import BaseModel, Field
-
-from macaron.ai.ai_factory import AIClientFactory
+from macaron.ai.clients.ai_factory import AIClientFactory
 from macaron.json_tools import JsonType
 from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
 from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
@@ -18,23 +15,6 @@
 logger: logging.Logger = logging.getLogger(__name__)
 
 
-class Result(BaseModel):
-    """The result after analysing the code with its docstrings."""
-
-    decision: Literal["consistent", "inconsistent"] = Field(
-        description=""" The final decision after analysing the code with its docstrings.
-        It can be either 'consistent' or 'inconsistent'."""
-    )
-    reason: str = Field(
-        description=" The reason for the decision made. It should be a short sentence explaining the decision."
-    )
-    inconsistent_code_part: str | None = Field(
-        default=None,
-        description=""" The specific part of the code that is inconsistent with the docstring.
-        Empty if the decision is 'consistent'.""",
-    )
-
-
 class MatchingDocstringsAnalyzer(BaseHeuristicAnalyzer):
     """Check whether the docstrings and the code components are consistent."""
 
@@ -54,6 +34,35 @@ class MatchingDocstringsAnalyzer(BaseHeuristicAnalyzer):
 
     REQUEST_INTERVAL = 0.5
 
+    RESPONSE_FORMAT = {
+        "type": "json_schema",
+        "json_schema": {
+            "name": "result_schema",
+            "strict": True,
+            "schema": {
+                "type": "object",
+                "properties": {
+                    "decision": {
+                        "type": "string",
+                        "enum": ["consistent", "inconsistent"],
+                        "description": """The final decision after analysing the code with its docstrings.
+                        It can be either 'consistent' or 'inconsistent'.""",
+                    },
+                    "reason": {
+                        "type": "string",
+                        "description": "The reason for the decision made.",
+                    },
+                    "inconsistent_code_part": {
+                        "type": ["string", "null"],
+                        "description": """The specific part of the code that is inconsistent with the docstring.
+                        Empty if the decision is 'consistent'.""",
+                    },
+                },
+            },
+            "required": ["decision", "reason", "inconsistent_code_part"],
+        },
+    }
+
     def __init__(self) -> None:
         super().__init__(
             name="matching_docstrings_analyzer",
@@ -61,12 +70,7 @@ def __init__(self) -> None:
             depends_on=None,
         )
         factory = AIClientFactory()
-        client = None
-
-        if factory.defaults["enabled"]:
-            client = factory.create_client(self.SYSTEM_PROMPT.strip())
-
-        self.client = client
+        self.client = factory.create_client(self.SYSTEM_PROMPT.strip())
 
     def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
         """Analyze the package.
@@ -95,12 +99,12 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
                 code_str = content.decode("utf-8", "ignore")
                 analysis_result = self.client.invoke(
                     user_prompt=code_str,
-                    structured_output=Result,
+                    response_format=self.RESPONSE_FORMAT,
                 )
-                if analysis_result and analysis_result.decision == "inconsistent":
+                if analysis_result["decision"] == "inconsistent":
                     return HeuristicResult.FAIL, {
                         "file": file,
-                        "reason": analysis_result.reason,
-                        "inconsistent part": analysis_result.inconsistent_code_part or "",
+                        "reason": analysis_result["reason"],
+                        "inconsistent part": analysis_result["inconsistent_code_part"] or "",
                     }
         return HeuristicResult.PASS, {}
diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
index a09289713..304f3c9e7 100644
--- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
+++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
@@ -22,6 +22,7 @@
 from macaron.malware_analyzer.pypi_heuristics.metadata.empty_project_link import EmptyProjectLinkAnalyzer
 from macaron.malware_analyzer.pypi_heuristics.metadata.fake_email import FakeEmailAnalyzer
 from macaron.malware_analyzer.pypi_heuristics.metadata.high_release_frequency import HighReleaseFrequencyAnalyzer
+from macaron.malware_analyzer.pypi_heuristics.metadata.inconsistent_description import InconsistentDescriptionAnalyzer
 from macaron.malware_analyzer.pypi_heuristics.metadata.one_release import OneReleaseAnalyzer
 from macaron.malware_analyzer.pypi_heuristics.metadata.similar_projects import SimilarProjectAnalyzer
 from macaron.malware_analyzer.pypi_heuristics.metadata.source_code_repo import SourceCodeRepoAnalyzer
@@ -368,6 +369,8 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
         FakeEmailAnalyzer,
         SimilarProjectAnalyzer,
         MatchingDocstringsAnalyzer,
+        # This heuristic is not used in any combination below, some tests needed before doing that.
+        InconsistentDescriptionAnalyzer,
     ]
 
     # name used to query the result of all problog rules, so it can be accessed outside the model.
diff --git a/tests/malware_analyzer/pypi/test_inconsistent_description.py b/tests/malware_analyzer/pypi/test_inconsistent_description.py
new file mode 100644
index 000000000..69a557c41
--- /dev/null
+++ b/tests/malware_analyzer/pypi/test_inconsistent_description.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Tests for the InconsistentDescriptionAnalyzer heuristic."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from macaron.errors import HeuristicAnalyzerValueError
+from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult
+from macaron.malware_analyzer.pypi_heuristics.metadata.inconsistent_description import InconsistentDescriptionAnalyzer
+
+
+@pytest.fixture(name="analyzer")
+def analyzer_() -> InconsistentDescriptionAnalyzer:
+    """Pytest fixture to create an InconsistentDescriptionAnalyzer instance."""
+    return InconsistentDescriptionAnalyzer()
+
+
+@pytest.fixture(autouse=True)
+def skip_if_client_disabled(analyzer: InconsistentDescriptionAnalyzer) -> None:
+    """
+    Automatically skip tests in this file if the AI client is disabled.
+    """
+    if not analyzer.client:
+        pytest.skip("AI client disabled - skipping test")
+
+
+def test_analyze_consistent_description_pass(
+    analyzer: InconsistentDescriptionAnalyzer, pypi_package_json: MagicMock
+) -> None:
+    """Test the analyzer passes when the description is consistent."""
+    pypi_package_json.package_json = {"info": {"description": "This is a test package."}}
+    mock_result = {"score": 80, "reason": "The description is consistent."}
+
+    with patch.object(analyzer.client, "invoke", return_value=mock_result) as mock_invoke:
+        result, info = analyzer.analyze(pypi_package_json)
+        assert result == HeuristicResult.PASS
+        assert isinstance(info["message"], str)
+        assert "consistent description with a 80 score" in info["message"]
+        mock_invoke.assert_called_once()
+
+
+def test_analyze_inconsistent_description_fail(
+    analyzer: InconsistentDescriptionAnalyzer, pypi_package_json: MagicMock
+) -> None:
+    """Test the analyzer fails when the description is inconsistent."""
+    pypi_package_json.package_json = {"info": {"description": "This is a misleading package."}}
+    mock_result = {"score": 30, "reason": "The description is misleading."}
+
+    with patch.object(analyzer.client, "invoke", return_value=mock_result) as mock_invoke:
+        result, info = analyzer.analyze(pypi_package_json)
+        assert result == HeuristicResult.FAIL
+        assert isinstance(info["message"], str)
+        assert "inconsistent description with score 30" in info["message"]
+        assert "because The description is misleading" in info["message"]
+        mock_invoke.assert_called_once()
+
+
+def test_analyze_no_description_fail(analyzer: InconsistentDescriptionAnalyzer, pypi_package_json: MagicMock) -> None:
+    """Test the analyzer fails if there is no description."""
+    pypi_package_json.package_json = {"info": {"description": " "}}
+    with patch.object(analyzer.client, "invoke") as mock_invoke:
+        result, info = analyzer.analyze(pypi_package_json)
+        assert result == HeuristicResult.FAIL
+        assert info["message"] == "No description found."
+        mock_invoke.assert_not_called()
+
+
+def test_analyze_no_info_raises_error(analyzer: InconsistentDescriptionAnalyzer, pypi_package_json: MagicMock) -> None:
+    """Test the analyzer raises an error if the package JSON has no 'info' field."""
+    pypi_package_json.package_json = {}
+    with pytest.raises(HeuristicAnalyzerValueError):
+        analyzer.analyze(pypi_package_json)
diff --git a/tests/malware_analyzer/pypi/test_matching_docstrings.py b/tests/malware_analyzer/pypi/test_matching_docstrings.py
index f051bf76c..dbdcddebe 100644
--- a/tests/malware_analyzer/pypi/test_matching_docstrings.py
+++ b/tests/malware_analyzer/pypi/test_matching_docstrings.py
@@ -8,7 +8,7 @@
 import pytest
 
 from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult
-from macaron.malware_analyzer.pypi_heuristics.sourcecode.matching_docstrings import MatchingDocstringsAnalyzer, Result
+from macaron.malware_analyzer.pypi_heuristics.sourcecode.matching_docstrings import MatchingDocstringsAnalyzer
 
 
 @pytest.fixture(name="analyzer")
@@ -31,7 +31,11 @@ def test_analyze_consistent_docstrings_pass(analyzer: MatchingDocstringsAnalyzer
     pypi_package_json.download_sourcecode.return_value = True
     pypi_package_json.iter_sourcecode.return_value = [("test.py", b"def func():\n    '''docstring'''\n    pass")]
 
-    mock_result = Result(decision="consistent", reason="The code is consistent with the docstring.")
+    mock_result = {
+        "decision": "consistent",
+        "reason": "The code is consistent with the docstring.",
+        "inconsistent_code_part": None,
+    }
 
     with patch.object(analyzer.client, "invoke", return_value=mock_result) as mock_invoke:
         result, info = analyzer.analyze(pypi_package_json)
@@ -49,11 +53,11 @@ def test_analyze_inconsistent_docstrings_fail(
         ("test.py", b"def func():\n    '''docstring'''\n    print('hello')")
     ]
 
-    mock_result = Result(
-        decision="inconsistent",
-        reason="The docstring does not mention the print statement.",
-        inconsistent_code_part="print('hello')",
-    )
+    mock_result = {
+        "decision": "inconsistent",
+        "reason": "The docstring does not mention the print statement.",
+        "inconsistent_code_part": "print('hello')",
+    }
 
     with patch.object(analyzer.client, "invoke", return_value=mock_result):
         result, info = analyzer.analyze(pypi_package_json)

From d0a0d65939cc34351c70c780202815b2b44c2efc Mon Sep 17 00:00:00 2001
From: Amine <amine.raouane@enim.ac.ma>
Date: Mon, 25 Aug 2025 18:13:23 +0100
Subject: [PATCH 4/5] refactor: move threshold configuration to defaults.ini

Signed-off-by: Amine <amine.raouane@enim.ac.ma>
---
 src/macaron/config/defaults.ini                    |  3 +++
 .../metadata/inconsistent_description.py           | 14 +++++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini
index 980e18730..f4ae2cd2a 100644
--- a/src/macaron/config/defaults.ini
+++ b/src/macaron/config/defaults.ini
@@ -609,6 +609,9 @@ popular_packages_path =
 # A boolean value that determines whether to check the deliverability of the email address.
 check_deliverability = True
 
+# The threshold for a package's description score to be considered secure.
+score_threshold = 70
+
 # ==== The following sections are for source code analysis using Semgrep ====
 # rulesets: a reference to a 'ruleset' in this section refers to a Semgrep .yaml file containing one or more rules.
 # rules: a reference to a 'rule' in this section refers to an individual rule ID, specified by the '- id:' field in
diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/inconsistent_description.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/inconsistent_description.py
index 31fd7ac38..8aa5a61e9 100644
--- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/inconsistent_description.py
+++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/inconsistent_description.py
@@ -6,6 +6,7 @@
 import logging
 
 from macaron.ai.clients.ai_factory import AIClientFactory
+from macaron.config.defaults import defaults
 from macaron.errors import HeuristicAnalyzerValueError
 from macaron.json_tools import JsonType, json_extract
 from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
@@ -35,8 +36,6 @@ class InconsistentDescriptionAnalyzer(BaseHeuristicAnalyzer):
     }
     """
 
-    THRESHOLD = 60
-
     RESPONSE_FORMAT = {
         "type": "json_schema",
         "json_schema": {
@@ -65,9 +64,18 @@ def __init__(self) -> None:
         super().__init__(
             name="inconsistent_description_analyzer", heuristic=Heuristics.INCONSISTENT_DESCRIPTION, depends_on=None
         )
+        self.threshold = self._load_defaults()
         factory = AIClientFactory()
         self.client = factory.create_client(self.SYSTEM_PROMPT.strip())
 
+    def _load_defaults(self) -> int:
+        """Load the default values from defaults.ini."""
+        section_name = "heuristic.pypi"
+        if defaults.has_section(section_name):
+            section = defaults[section_name]
+            return section.getint("score_threshold", 70)
+        return 70
+
     def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicResult, dict[str, JsonType]]:
         """Analyze the package.
 
@@ -100,7 +108,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
             response_format=self.RESPONSE_FORMAT,
         )
 
-        if analysis_result["score"] < self.THRESHOLD:
+        if analysis_result["score"] < self.threshold:
             return HeuristicResult.FAIL, {
                 "message": f"inconsistent description with score {analysis_result['score']}. because {analysis_result['reason']}"
             }

From 2df3401eda56ea241f91acc222d363b7116a217f Mon Sep 17 00:00:00 2001
From: Amine <amine.raouane@enim.ac.ma>
Date: Sat, 6 Sep 2025 02:19:19 +0100
Subject: [PATCH 5/5] chore(tests): improve test coverage and apply minor
 heuristic changes

Signed-off-by: Amine <amine.raouane@enim.ac.ma>
---
 src/macaron/ai/clients/openai_client.py       |   2 +
 .../metadata/inconsistent_description.py      |   3 +
 .../sourcecode/matching_docstrings.py         |   8 ++
 .../slsa_analyzer/build_tool/gradle.py        |  89 ------------
 src/macaron/slsa_analyzer/build_tool/maven.py |  68 ---------
 src/macaron/slsa_analyzer/build_tool/pip.py   |   5 -
 .../slsa_analyzer/build_tool/poetry.py        |   5 -
 .../checks/detect_malicious_metadata_check.py |   4 +-
 .../pypi/test_inconsistent_description.py     | 134 +++++++++++++++---
 .../pypi/test_matching_docstrings.py          |  70 ++++-----
 10 files changed, 163 insertions(+), 225 deletions(-)

diff --git a/src/macaron/ai/clients/openai_client.py b/src/macaron/ai/clients/openai_client.py
index c788cab45..772cc61d8 100644
--- a/src/macaron/ai/clients/openai_client.py
+++ b/src/macaron/ai/clients/openai_client.py
@@ -27,6 +27,7 @@ def invoke(
         temperature: float = 0.2,
         response_format: dict | None = None,
         max_tokens: int = 4000,
+        seed: int = 42,
         timeout: int = 30,
     ) -> Any:
         """
@@ -65,6 +66,7 @@ def invoke(
             "messages": [{"role": "system", "content": self.system_prompt}, {"role": "user", "content": user_prompt}],
             "response_format": response_format,
             "temperature": temperature,
+            "seed": seed,
             "max_tokens": max_tokens,
         }
 
diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/inconsistent_description.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/inconsistent_description.py
index 8aa5a61e9..5ca7d8ce1 100644
--- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/inconsistent_description.py
+++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/inconsistent_description.py
@@ -107,6 +107,9 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
             user_prompt=description,
             response_format=self.RESPONSE_FORMAT,
         )
+        if not analysis_result:
+            logger.error("LLM returned invalid response, skipping the analysis.")
+            return HeuristicResult.SKIP, {}
 
         if analysis_result["score"] < self.threshold:
             return HeuristicResult.FAIL, {
diff --git a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py
index a1e7fd1c0..51cf6eb8d 100644
--- a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py
+++ b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/matching_docstrings.py
@@ -93,6 +93,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
             logger.warning("No source code found for the package, skipping the matching docstrings analysis.")
             return HeuristicResult.SKIP, {}
 
+        none_attempts = 5
         for file, content in pypi_package_json.iter_sourcecode():
             if file.endswith(".py"):
                 time.sleep(self.REQUEST_INTERVAL)  # Respect the request interval to avoid rate limiting.
@@ -101,6 +102,13 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
                     user_prompt=code_str,
                     response_format=self.RESPONSE_FORMAT,
                 )
+                if not analysis_result:
+                    none_attempts -= 1
+                    if none_attempts == 0:
+                        logger.error("LLM returned None multiple times, skipping the analysis.")
+                        return HeuristicResult.SKIP, {}
+                    continue
+
                 if analysis_result["decision"] == "inconsistent":
                     return HeuristicResult.FAIL, {
                         "file": file,
diff --git a/src/macaron/slsa_analyzer/build_tool/gradle.py b/src/macaron/slsa_analyzer/build_tool/gradle.py
index c67f428cd..bd316dd30 100644
--- a/src/macaron/slsa_analyzer/build_tool/gradle.py
+++ b/src/macaron/slsa_analyzer/build_tool/gradle.py
@@ -1,5 +1,4 @@
 # Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module contains the Gradle class which inherits BaseBuildTool.
@@ -70,94 +69,6 @@ def is_detected(self, repo_path: str) -> bool:
         gradle_config_files = self.build_configs + self.entry_conf
         return any(file_exists(repo_path, file) for file in gradle_config_files)
 
-    def prepare_config_files(self, wrapper_path: str, build_dir: str) -> bool:
-        """Prepare the necessary wrapper files for running the build.
-
-        This method will return False if there is any errors happened during operation.
-
-        Parameters
-        ----------
-        wrapper_path : str
-            The path where all necessary wrapper files are located.
-        build_dir : str
-            The path of the build dir. This is where all files are copied to.
-
-        Returns
-        -------
-        bool
-            True if succeed else False.
-        """
-        # The path of the needed wrapper files
-        wrapper_files = self.wrapper_files
-
-        if copy_file_bulk(wrapper_files, wrapper_path, build_dir):
-            # Ensure that gradlew is executable.
-            file_path = os.path.join(build_dir, "gradlew")
-            status = os.stat(file_path)
-            if oct(status.st_mode)[-3:] != "744":
-                logger.debug("%s does not have 744 permission. Changing it to 744.")
-                os.chmod(file_path, 0o744)
-            return True
-
-        return False
-
-    def get_dep_analyzer(self) -> CycloneDxGradle:
-        """Create a DependencyAnalyzer for the Gradle build tool.
-
-        Returns
-        -------
-        CycloneDxGradle
-            The CycloneDxGradle object.
-
-        Raises
-        ------
-        DependencyAnalyzerError
-        """
-        if "dependency.resolver" not in defaults or "dep_tool_gradle" not in defaults["dependency.resolver"]:
-            raise DependencyAnalyzerError("No default dependency analyzer is found.")
-        if not DependencyAnalyzer.tool_valid(defaults.get("dependency.resolver", "dep_tool_gradle")):
-            raise DependencyAnalyzerError(
-                f"Dependency analyzer {defaults.get('dependency.resolver', 'dep_tool_gradle')} is not valid.",
-            )
-
-        tool_name, tool_version = tuple(
-            defaults.get(
-                "dependency.resolver",
-                "dep_tool_gradle",
-                fallback="cyclonedx-gradle:1.7.3",
-            ).split(":")
-        )
-        if tool_name == DependencyTools.CYCLONEDX_GRADLE:
-            return CycloneDxGradle(
-                resources_path=global_config.resources_path,
-                file_name="bom.json",
-                tool_name=tool_name,
-                tool_version=tool_version,
-            )
-
-        raise DependencyAnalyzerError(f"Unsupported SBOM generator for Gradle: {tool_name}.")
-
-    def get_gradle_exec(self, repo_path: str) -> str:
-        """Get the Gradle executable for the repo.
-
-        Parameters
-        ----------
-        repo_path: str
-            The absolute path to a repository containing Gradle projects.
-
-        Returns
-        -------
-        str
-            The absolute path to the Gradle executable.
-        """
-        # We try to use the gradlew that comes with the repository first.
-        repo_gradlew = os.path.join(repo_path, "gradlew")
-        if os.path.isfile(repo_gradlew) and os.access(repo_gradlew, os.X_OK):
-            return repo_gradlew
-
-        # We use Macaron's built-in gradlew as a fallback option.
-        return os.path.join(os.path.join(macaron.MACARON_PATH, "resources"), "gradlew")
-
     def get_group_id(self, gradle_exec: str, project_path: str) -> str | None:
         """Get the group id of a Gradle project.
 
diff --git a/src/macaron/slsa_analyzer/build_tool/maven.py b/src/macaron/slsa_analyzer/build_tool/maven.py
index 922fb7b71..0e89849af 100644
--- a/src/macaron/slsa_analyzer/build_tool/maven.py
+++ b/src/macaron/slsa_analyzer/build_tool/maven.py
@@ -64,71 +64,3 @@ def is_detected(self, repo_path: str) -> bool:
             return False
         maven_config_files = self.build_configs
         return any(file_exists(repo_path, file) for file in maven_config_files)
-
-    def prepare_config_files(self, wrapper_path: str, build_dir: str) -> bool:
-        """Prepare the necessary wrapper files for running the build.
-
-        This method will return False if there is any errors happened during operation.
-
-        Parameters
-        ----------
-        wrapper_path : str
-            The path where all necessary wrapper files are located.
-        build_dir : str
-            The path of the build dir. This is where all files are copied to.
-
-        Returns
-        -------
-        bool
-            True if succeed else False.
-        """
-        # The path of the needed wrapper files
-        wrapper_files = self.wrapper_files
-
-        if copy_file_bulk(wrapper_files, wrapper_path, build_dir):
-            # Ensure that mvnw is executable.
-            file_path = os.path.join(build_dir, "mvnw")
-            status = os.stat(file_path)
-            if oct(status.st_mode)[-3:] != "744":
-                logger.debug("%s does not have 744 permission. Changing it to 744.")
-                os.chmod(file_path, 0o744)
-            return True
-
-        return False
-
-    def get_dep_analyzer(self) -> CycloneDxMaven:
-        """
-        Create a DependencyAnalyzer for the Maven build tool.
-
-        Returns
-        -------
-        CycloneDxMaven
-            The CycloneDxMaven object.
-
-        Raises
-        ------
-        DependencyAnalyzerError
-        """
-        if "dependency.resolver" not in defaults or "dep_tool_maven" not in defaults["dependency.resolver"]:
-            raise DependencyAnalyzerError("No default dependency analyzer is found.")
-        if not DependencyAnalyzer.tool_valid(defaults.get("dependency.resolver", "dep_tool_maven")):
-            raise DependencyAnalyzerError(
-                f"Dependency analyzer {defaults.get('dependency.resolver', 'dep_tool_maven')} is not valid.",
-            )
-
-        tool_name, tool_version = tuple(
-            defaults.get(
-                "dependency.resolver",
-                "dep_tool_maven",
-                fallback="cyclonedx-maven:2.6.2",
-            ).split(":")
-        )
-        if tool_name == DependencyTools.CYCLONEDX_MAVEN:
-            return CycloneDxMaven(
-                resources_path=global_config.resources_path,
-                file_name="bom.json",
-                tool_name=tool_name,
-                tool_version=tool_version,
-            )
-
-        raise DependencyAnalyzerError(f"Unsupported SBOM generator for Maven: {tool_name}.")
diff --git a/src/macaron/slsa_analyzer/build_tool/pip.py b/src/macaron/slsa_analyzer/build_tool/pip.py
index 073380ec2..1926ca33b 100644
--- a/src/macaron/slsa_analyzer/build_tool/pip.py
+++ b/src/macaron/slsa_analyzer/build_tool/pip.py
@@ -64,11 +64,6 @@ def get_dep_analyzer(self) -> DependencyAnalyzer:
         DependencyAnalyzer
             The DependencyAnalyzer object.
         """
-        tool_name = "cyclonedx_py"
-        if not DependencyAnalyzer.tool_valid(f"{tool_name}:{cyclonedx_version}"):
-            raise DependencyAnalyzerError(
-                f"Dependency analyzer {defaults.get('dependency.resolver', 'dep_tool_gradle')} is not valid.",
-            )
         return CycloneDxPython(
             resources_path=global_config.resources_path,
             file_name="python_sbom.json",
diff --git a/src/macaron/slsa_analyzer/build_tool/poetry.py b/src/macaron/slsa_analyzer/build_tool/poetry.py
index 3e928dfca..a1d5a4a0d 100644
--- a/src/macaron/slsa_analyzer/build_tool/poetry.py
+++ b/src/macaron/slsa_analyzer/build_tool/poetry.py
@@ -102,11 +102,6 @@ def get_dep_analyzer(self) -> DependencyAnalyzer:
         DependencyAnalyzer
             The DependencyAnalyzer object.
         """
-        tool_name = "cyclonedx_py"
-        if not DependencyAnalyzer.tool_valid(f"{tool_name}:{cyclonedx_version}"):
-            raise DependencyAnalyzerError(
-                f"Dependency analyzer {defaults.get('dependency.resolver', 'dep_tool_gradle')} is not valid.",
-            )
         return CycloneDxPython(
             resources_path=global_config.resources_path,
             file_name="python_sbom.json",
diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
index 304f3c9e7..c3233bce1 100644
--- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
+++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
@@ -450,8 +450,9 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
         failed({Heuristics.SIMILAR_PROJECTS.value}),
         failed({Heuristics.HIGH_RELEASE_FREQUENCY.value}),
         failed({Heuristics.FAKE_EMAIL.value}).
+
     % Package released with a name similar to a popular package.
-    {Confidence.MEDIUM.value}::trigger(malware_medium_confidence_3) :-
+    {Confidence.MEDIUM.value}::trigger(malware_medium_confidence_5) :-
         quickUndetailed, forceSetup, failed({Heuristics.MATCHING_DOCSTRINGS.value}).
 
     % ----- Evaluation -----
@@ -461,6 +462,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
     {problog_result_access} :- trigger(malware_high_confidence_2).
     {problog_result_access} :- trigger(malware_high_confidence_3).
     {problog_result_access} :- trigger(malware_high_confidence_4).
+    {problog_result_access} :- trigger(malware_medium_confidence_5).
     {problog_result_access} :- trigger(malware_medium_confidence_4).
     {problog_result_access} :- trigger(malware_medium_confidence_3).
     {problog_result_access} :- trigger(malware_medium_confidence_2).
diff --git a/tests/malware_analyzer/pypi/test_inconsistent_description.py b/tests/malware_analyzer/pypi/test_inconsistent_description.py
index 69a557c41..51b96a166 100644
--- a/tests/malware_analyzer/pypi/test_inconsistent_description.py
+++ b/tests/malware_analyzer/pypi/test_inconsistent_description.py
@@ -27,35 +27,16 @@ def skip_if_client_disabled(analyzer: InconsistentDescriptionAnalyzer) -> None:
         pytest.skip("AI client disabled - skipping test")
 
 
-def test_analyze_consistent_description_pass(
-    analyzer: InconsistentDescriptionAnalyzer, pypi_package_json: MagicMock
-) -> None:
-    """Test the analyzer passes when the description is consistent."""
-    pypi_package_json.package_json = {"info": {"description": "This is a test package."}}
-    mock_result = {"score": 80, "reason": "The description is consistent."}
-
-    with patch.object(analyzer.client, "invoke", return_value=mock_result) as mock_invoke:
-        result, info = analyzer.analyze(pypi_package_json)
-        assert result == HeuristicResult.PASS
-        assert isinstance(info["message"], str)
-        assert "consistent description with a 80 score" in info["message"]
-        mock_invoke.assert_called_once()
-
-
 def test_analyze_inconsistent_description_fail(
     analyzer: InconsistentDescriptionAnalyzer, pypi_package_json: MagicMock
 ) -> None:
     """Test the analyzer fails when the description is inconsistent."""
     pypi_package_json.package_json = {"info": {"description": "This is a misleading package."}}
-    mock_result = {"score": 30, "reason": "The description is misleading."}
 
-    with patch.object(analyzer.client, "invoke", return_value=mock_result) as mock_invoke:
-        result, info = analyzer.analyze(pypi_package_json)
-        assert result == HeuristicResult.FAIL
-        assert isinstance(info["message"], str)
-        assert "inconsistent description with score 30" in info["message"]
-        assert "because The description is misleading" in info["message"]
-        mock_invoke.assert_called_once()
+    result, info = analyzer.analyze(pypi_package_json)
+    assert result == HeuristicResult.FAIL
+    assert isinstance(info["message"], str)
+    assert info["message"].startswith("inconsistent description with score")
 
 
 def test_analyze_no_description_fail(analyzer: InconsistentDescriptionAnalyzer, pypi_package_json: MagicMock) -> None:
@@ -73,3 +54,110 @@ def test_analyze_no_info_raises_error(analyzer: InconsistentDescriptionAnalyzer,
     pypi_package_json.package_json = {}
     with pytest.raises(HeuristicAnalyzerValueError):
         analyzer.analyze(pypi_package_json)
+
+
+CONSISTENT_DESCRIPTION = """
+# Requests
+
+**Requests** is a simple, yet elegant, HTTP library.
+
+Requests allows you to send HTTP/1.1 requests extremely easily.
+There’s no need to manually add query strings to your URLs,
+or to form-encode your `PUT` & `POST` data — but nowadays, just use the `json` method!
+
+Requests is one of the most downloaded Python packages today,
+pulling in around `30M downloads / week`— according to GitHub,
+Requests is currently
+[depended upon](https://github.com/psf/requests/network/dependents?package_id=UGFja2FnZS01NzA4OTExNg%3D%3D)
+by `1,000,000+` repositories.
+You may certainly put your trust in this code.
+
+[![Downloads](https://static.pepy.tech/badge/requests/month)](https://pepy.tech/project/requests)
+[![Supported Versions](https://img.shields.io/pypi/pyversions/requests.svg)](https://pypi.org/project/requests)
+[![Contributors](https://img.shields.io/github/contributors/psf/requests.svg)](https://github.com/psf/requests/graphs/contributors)
+
+## Installing Requests and Supported Versions
+
+Requests is available on PyPI:
+
+```console
+$ python -m pip install requests
+```
+
+Requests officially supports Python 3.9+.
+
+## Supported Features & Best–Practices
+
+Requests is ready for the demands of building robust and reliable HTTP–speaking applications, for the needs of today.
+
+- Keep-Alive & Connection Pooling
+- International Domains and URLs
+- Sessions with Cookie Persistence
+- Browser-style TLS/SSL Verification
+- Basic & Digest Authentication
+- Familiar `dict`–like Cookies
+- Automatic Content Decompression and Decoding
+- Multi-part File Uploads
+- SOCKS Proxy Support
+- Connection Timeouts
+- Streaming Downloads
+- Automatic honoring of `.netrc`
+- Chunked HTTP Requests
+
+## API Reference and User Guide available on [Read the Docs](https://requests.readthedocs.io)
+
+[![Read the Docs](https://raw.githubusercontent.com/psf/requests/main/ext/ss.png)](https://requests.readthedocs.io)
+
+## Cloning the repository
+
+When cloning the Requests repository, you may need to add the `-c
+fetch.fsck.badTimezone=ignore` flag to avoid an error about a bad commit timestamp (see
+[this issue](https://github.com/psf/requests/issues/2690) for more background):
+
+```shell
+git clone -c fetch.fsck.badTimezone=ignore https://github.com/psf/requests.git
+```
+
+You can also apply this setting to your global Git config:
+
+```shell
+git config --global fetch.fsck.badTimezone ignore
+```
+
+---
+
+[![Kenneth Reitz](https://raw.githubusercontent.com/psf/requests/main/ext/kr.png)](https://kennethreitz.org)
+[![Python Software Foundation](https://raw.githubusercontent.com/psf/requests/main/ext/psf.png)](https://www.python.org/psf)
+"""
+
+
+def test_analyze_consistent_description_pass(
+    analyzer: InconsistentDescriptionAnalyzer, pypi_package_json: MagicMock
+) -> None:
+    """Test the analyzer passes when the description is consistent."""
+    pypi_package_json.package_json = {
+        "info": {
+            "description": CONSISTENT_DESCRIPTION,
+        }
+    }
+
+    result, info = analyzer.analyze(pypi_package_json)
+    assert result == HeuristicResult.PASS
+    assert isinstance(info["message"], str)
+    assert info["message"].startswith("consistent description with a ")
+
+
+def test_analyze_excessive_llm_invocation_error_skip(
+    analyzer: InconsistentDescriptionAnalyzer, pypi_package_json: MagicMock
+) -> None:
+    """Test the analyzer skips if the LLM invocation returns None multiple times."""
+    pypi_package_json.package_json = {
+        "info": {
+            "description": "description",
+        }
+    }
+
+    with patch.object(analyzer.client, "invoke", return_value=None):
+        result, info = analyzer.analyze(pypi_package_json)
+        assert result == HeuristicResult.SKIP
+        assert not info
diff --git a/tests/malware_analyzer/pypi/test_matching_docstrings.py b/tests/malware_analyzer/pypi/test_matching_docstrings.py
index dbdcddebe..dcad994de 100644
--- a/tests/malware_analyzer/pypi/test_matching_docstrings.py
+++ b/tests/malware_analyzer/pypi/test_matching_docstrings.py
@@ -31,17 +31,9 @@ def test_analyze_consistent_docstrings_pass(analyzer: MatchingDocstringsAnalyzer
     pypi_package_json.download_sourcecode.return_value = True
     pypi_package_json.iter_sourcecode.return_value = [("test.py", b"def func():\n    '''docstring'''\n    pass")]
 
-    mock_result = {
-        "decision": "consistent",
-        "reason": "The code is consistent with the docstring.",
-        "inconsistent_code_part": None,
-    }
-
-    with patch.object(analyzer.client, "invoke", return_value=mock_result) as mock_invoke:
-        result, info = analyzer.analyze(pypi_package_json)
-        assert result == HeuristicResult.PASS
-        assert not info
-        mock_invoke.assert_called_once()
+    result, info = analyzer.analyze(pypi_package_json)
+    assert result == HeuristicResult.PASS
+    assert not info
 
 
 def test_analyze_inconsistent_docstrings_fail(
@@ -50,42 +42,39 @@ def test_analyze_inconsistent_docstrings_fail(
     """Test the analyzer fails when docstrings are inconsistent with the code."""
     pypi_package_json.download_sourcecode.return_value = True
     pypi_package_json.iter_sourcecode.return_value = [
-        ("test.py", b"def func():\n    '''docstring'''\n    print('hello')")
+        (
+            "test.py",
+            b"""
+            def factorial(number: int):
+                '''A function that returns the factorial of a number'''
+                return number * 2
+                print('hello')
+            """,
+        ),
     ]
 
-    mock_result = {
-        "decision": "inconsistent",
-        "reason": "The docstring does not mention the print statement.",
-        "inconsistent_code_part": "print('hello')",
-    }
-
-    with patch.object(analyzer.client, "invoke", return_value=mock_result):
-        result, info = analyzer.analyze(pypi_package_json)
-        assert result == HeuristicResult.FAIL
-        assert info["file"] == "test.py"
-        assert info["reason"] == "The docstring does not mention the print statement."
-        assert info["inconsistent part"] == "print('hello')"
+    result, info = analyzer.analyze(pypi_package_json)
+    assert result == HeuristicResult.FAIL
+    assert info["file"] == "test.py"
 
 
 def test_analyze_no_source_code_skip(analyzer: MatchingDocstringsAnalyzer, pypi_package_json: MagicMock) -> None:
     """Test the analyzer skips if the source code cannot be downloaded."""
     pypi_package_json.download_sourcecode.return_value = False
-    with patch.object(analyzer.client, "invoke") as mock_invoke:
-        result, info = analyzer.analyze(pypi_package_json)
-        assert result == HeuristicResult.SKIP
-        assert not info
-        mock_invoke.assert_not_called()
+
+    result, info = analyzer.analyze(pypi_package_json)
+    assert result == HeuristicResult.SKIP
+    assert not info
 
 
 def test_analyze_no_python_files_pass(analyzer: MatchingDocstringsAnalyzer, pypi_package_json: MagicMock) -> None:
     """Test the analyzer passes if there are no Python files in the source code."""
     pypi_package_json.download_sourcecode.return_value = True
     pypi_package_json.iter_sourcecode.return_value = [("README.md", b"This is a test package.")]
-    with patch.object(analyzer.client, "invoke") as mock_invoke:
-        result, info = analyzer.analyze(pypi_package_json)
-        assert result == HeuristicResult.PASS
-        assert not info
-        mock_invoke.assert_not_called()
+
+    result, info = analyzer.analyze(pypi_package_json)
+    assert result == HeuristicResult.PASS
+    assert not info
 
 
 def test_analyze_llm_invocation_error_pass(analyzer: MatchingDocstringsAnalyzer, pypi_package_json: MagicMock) -> None:
@@ -97,3 +86,16 @@ def test_analyze_llm_invocation_error_pass(analyzer: MatchingDocstringsAnalyzer,
         result, info = analyzer.analyze(pypi_package_json)
         assert result == HeuristicResult.PASS
         assert not info
+
+
+def test_analyze_excessive_llm_invocation_error_skip(
+    analyzer: MatchingDocstringsAnalyzer, pypi_package_json: MagicMock
+) -> None:
+    """Test the analyzer skips if the LLM invocation returns None multiple times."""
+    pypi_package_json.download_sourcecode.return_value = True
+    pypi_package_json.iter_sourcecode.return_value = [("test.py", b"def func():\n    pass") for _ in range(5)]
+
+    with patch.object(analyzer.client, "invoke", return_value=None):
+        result, info = analyzer.analyze(pypi_package_json)
+        assert result == HeuristicResult.SKIP
+        assert not info