diff --git a/TRACING.md b/TRACING.md new file mode 100644 index 0000000..f61ebed --- /dev/null +++ b/TRACING.md @@ -0,0 +1,54 @@ +# OpenLLMetry Tracing Setup + +This application now includes OpenLLMetry tracing to monitor LLM calls and performance in Traceloop. + +## Setup + +1. **Install dependencies** (if not already installed): + ```bash + uv sync + ``` + +2. **Set your Traceloop API key** (optional but recommended): + ```bash + export TRACELOOP_API_KEY="your_api_key_here" + ``` + + You can get your API key from your [Traceloop dashboard](https://app.traceloop.com/). + +3. **Run the application**: + ```bash + uv run kickoff + ``` + +## What Gets Traced + +The application automatically traces: + +- **Main prompt optimization flow**: Full workflow execution +- **Prompt evaluation**: Each evaluation step with scores and feedback +- **Prompt optimization**: Optimization attempts with before/after prompts +- **Retry logic**: Retry counts and completion reasons + +## Trace Attributes + +Each trace includes relevant attributes such as: +- Prompt content (original and optimized) +- Evaluation scores +- Retry counts +- Failure reasons +- Optimization results + +## Viewing Traces + +1. Go to your [Traceloop dashboard](https://app.traceloop.com/) +2. Navigate to the "Traces" section +3. View detailed traces of your prompt optimization runs + +## Local Development + +If you don't set the `TRACELOOP_API_KEY` environment variable, the application will still run and trace locally, but traces won't be sent to Traceloop. This allows for development without requiring API keys. + +## Environment Variables + +- `TRACELOOP_API_KEY`: Your Traceloop API key (optional for local development) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index c88ef61..1b4978d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "openai>=1.75.0", "python-dotenv>=1.1.0", "traceloop-sdk>=0.40.4", + "openllmetry>=0.27.0", ] [project.scripts] diff --git a/src/prompt_optimizer/evaluate_crew/evaluate_crew.py b/src/prompt_optimizer/evaluate_crew/evaluate_crew.py index b3d85ab..0b2ea2b 100644 --- a/src/prompt_optimizer/evaluate_crew/evaluate_crew.py +++ b/src/prompt_optimizer/evaluate_crew/evaluate_crew.py @@ -4,6 +4,7 @@ from typing import List from pydantic import BaseModel, Field from prompt_optimizer.tools.run_prompt import RunPrompt +from prompt_optimizer.tracing import tracer_instance class EvaluationResult(BaseModel): diff --git a/src/prompt_optimizer/main.py b/src/prompt_optimizer/main.py index 13f9427..086a9d1 100644 --- a/src/prompt_optimizer/main.py +++ b/src/prompt_optimizer/main.py @@ -8,6 +8,7 @@ EvaluationResult, ) from prompt_optimizer.optimize_crew.optimize_crew import PromptOptimizer +from prompt_optimizer.tracing import tracer_instance START_PROMPT = """Answer the following question based on the provided context: @@ -30,49 +31,70 @@ class PromptOptimizationFlow(Flow[PromptOptimizationFlowState]): @start("retry") def evaluate_prompt(self): - print("Evaluating prompt") - result: EvaluationResult = ( - PromptEvaluator().crew().kickoff(inputs={"prompt": self.state.prompt}) - ).pydantic - self.state.score = result.score - self.state.valid = not result.failure_reasons - self.state.feedback = result.failure_reasons - - print(f"Evaluation results:") - print(f"Score: {self.state.score:.2f}") - if result.failure_reasons: - print("\nFailure reasons:") - print(result.failure_reasons) - - self.state.retry_count += 1 - - return "optimize" + with tracer_instance.start_as_current_span("evaluate_prompt") as span: + print("Evaluating prompt") + span.set_attribute("prompt", self.state.prompt) + span.set_attribute("retry_count", self.state.retry_count) + + result: EvaluationResult = ( + PromptEvaluator().crew().kickoff(inputs={"prompt": self.state.prompt}) + ).pydantic + + self.state.score = result.score + self.state.valid = not result.failure_reasons + self.state.feedback = result.failure_reasons + + span.set_attribute("score", self.state.score) + span.set_attribute("valid", self.state.valid) + if result.failure_reasons: + span.set_attribute("failure_reasons", result.failure_reasons) + + print(f"Evaluation results:") + print(f"Score: {self.state.score:.2f}") + if result.failure_reasons: + print("\nFailure reasons:") + print(result.failure_reasons) + + self.state.retry_count += 1 + + return "optimize" @router(evaluate_prompt) def optimize_prompt(self): - if self.state.score > 0.8: - return "complete" - - if self.state.retry_count > 3: - return "max_retry_exceeded" - - print("Optimizing prompt") - result = ( - PromptOptimizer() - .crew() - .kickoff( - inputs={ - "prompt": self.state.prompt, - "feedback": self.state.feedback, - "score": self.state.score, - } + with tracer_instance.start_as_current_span("optimize_prompt") as span: + span.set_attribute("current_score", self.state.score) + span.set_attribute("retry_count", self.state.retry_count) + + if self.state.score > 0.8: + span.set_attribute("optimization_result", "complete") + return "complete" + + if self.state.retry_count > 3: + span.set_attribute("optimization_result", "max_retry_exceeded") + return "max_retry_exceeded" + + print("Optimizing prompt") + span.set_attribute("original_prompt", self.state.prompt) + span.set_attribute("feedback", self.state.feedback or "") + + result = ( + PromptOptimizer() + .crew() + .kickoff( + inputs={ + "prompt": self.state.prompt, + "feedback": self.state.feedback, + "score": self.state.score, + } + ) ) - ) - print("Optimized prompt:", result.raw) - self.state.prompt = result.raw + print("Optimized prompt:", result.raw) + span.set_attribute("optimized_prompt", result.raw) + span.set_attribute("optimization_result", "retry") + self.state.prompt = result.raw - return "retry" + return "retry" @listen("complete") def save_result(self): @@ -95,8 +117,10 @@ def max_retry_exceeded_exit(self): def kickoff(): - prompt_flow = PromptOptimizationFlow() - prompt_flow.kickoff() + with tracer_instance.start_as_current_span("prompt_optimization_flow") as span: + span.set_attribute("operation", "kickoff") + prompt_flow = PromptOptimizationFlow() + prompt_flow.kickoff() def plot(): diff --git a/src/prompt_optimizer/optimize_crew/optimize_crew.py b/src/prompt_optimizer/optimize_crew/optimize_crew.py index df586a8..d75ab92 100644 --- a/src/prompt_optimizer/optimize_crew/optimize_crew.py +++ b/src/prompt_optimizer/optimize_crew/optimize_crew.py @@ -3,6 +3,7 @@ from crewai.agents.agent_builder.base_agent import BaseAgent from crewai.knowledge.source.crew_docling_source import CrewDoclingSource from typing import List +from prompt_optimizer.tracing import tracer_instance @CrewBase diff --git a/src/prompt_optimizer/tracing.py b/src/prompt_optimizer/tracing.py new file mode 100644 index 0000000..f3d1581 --- /dev/null +++ b/src/prompt_optimizer/tracing.py @@ -0,0 +1,21 @@ +import os +from openllmetry import init, tracer + +def initialize_tracing(): + """Initialize OpenLLMetry tracing for the application.""" + # Get API key from environment variable or use placeholder + api_key = os.getenv("TRACELOOP_API_KEY") + + if not api_key: + print("Warning: TRACELOOP_API_KEY not set. Set it to enable tracing to Traceloop.") + # Initialize without API key for local development + init() + else: + # Initialize with API key for production + init(api_key=api_key) + + print("OpenLLMetry tracing initialized") + return tracer + +# Initialize tracing when module is imported +tracer_instance = initialize_tracing() \ No newline at end of file