From 18249499406d2882191944d7bc5d5d1042f461e9 Mon Sep 17 00:00:00 2001 From: Matthew Lenhard Date: Fri, 16 May 2025 13:51:01 -0400 Subject: [PATCH 1/2] feat - tests + evals --- README.md | 9 +++++++ package.json | 5 ++-- src/evals/evals.ts | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 src/evals/evals.ts diff --git a/README.md b/README.md index d412735..892a718 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,15 @@ If you see version numbers for all three, you are ready to proceed with the inst ## Installation + +## Running evals + +The evals package loads an mcp client that then runs the index.ts file, so there is no need to rebuild between tests. You can load environment variables by prefixing the npx command. Full documentation can be found [here](https://www.mcpevals.io/docs). + +```bash +OPENAI_API_KEY=your-key npx mcp-eval src/evals/evals.ts src/index.ts +``` + ### Running on Claude Desktop To configure Octagon MCP for Claude Desktop: diff --git a/package.json b/package.json index 3c79f0e..fba52fc 100644 --- a/package.json +++ b/package.json @@ -38,7 +38,8 @@ "@modelcontextprotocol/sdk": "^1.0.0", "dotenv": "^16.3.1", "openai": "^4.20.1", - "zod": "^3.22.4" + "zod": "^3.22.4", + "mcp-evals": "^1.0.18" }, "devDependencies": { "@types/node": "^20.10.0", @@ -56,4 +57,4 @@ "url": "https://github.com/OctagonAI/octagon-mcp-server/issues" }, "homepage": "https://docs.octagonagents.com" -} +} \ No newline at end of file diff --git a/src/evals/evals.ts b/src/evals/evals.ts new file mode 100644 index 0000000..c48cf5d --- /dev/null +++ b/src/evals/evals.ts @@ -0,0 +1,59 @@ +//evals.ts + +import { EvalConfig } from 'mcp-evals'; +import { openai } from "@ai-sdk/openai"; +import { grade, EvalFunction } from "mcp-evals"; + +const octagonSecAgentEval: EvalFunction = { + name: "octagon-sec-agent Tool Evaluation", + description: "Evaluates the SEC filings analysis capabilities of the octagon-sec-agent", + run: async () => { + const result = await grade(openai("gpt-4"), "What was Apple's R&D expense as a percentage of revenue in their latest fiscal year?"); + return JSON.parse(result); + } +}; + +const octagonTranscriptsAgentEval: EvalFunction = { + name: "octagon-transcripts-agent Evaluation", + description: "Evaluates the accuracy and completeness of the octagon-transcripts-agent for analyzing earnings call transcripts", + run: async () => { + const result = await grade(openai("gpt-4"), "What did Amazon's CEO say about AWS growth expectations in the latest earnings call?"); + return JSON.parse(result); + } +}; + +const octagonFinancialsAgentEval: EvalFunction = { + name: "octagon-financials-agent Evaluation", + description: "Evaluates the financial analysis and ratio calculation capabilities of the octagon-financials-agent", + run: async () => { + const result = await grade(openai("gpt-4"), "Compare the gross margins, operating margins, and net margins of Apple, Microsoft, and Google over the last 3 years and provide insights on which company shows the strongest profitability trends."); + return JSON.parse(result); + } +}; + +const octagonStockDataAgentEval: EvalFunction = { + name: "Octagon Stock Data Agent Evaluation", + description: "Evaluates the performance of the Octagon Stock Data Agent for stock market data and valuation analysis", + run: async () => { + const result = await grade(openai("gpt-4"), "Compare Apple's stock performance to the S&P 500 over the last 6 months, including any significant events or catalysts that influenced price movements."); + return JSON.parse(result); + } +}; + +const octagonCompaniesAgentEval: EvalFunction = { + name: 'octagon-companies-agent Evaluation', + description: 'Evaluates the specialized private market intelligence tool for company info lookups and financials', + run: async () => { + const result = await grade(openai("gpt-4"), "List the top 5 companies in the AI sector by revenue growth"); + return JSON.parse(result); + } +}; + +const config: EvalConfig = { + model: openai("gpt-4"), + evals: [octagonSecAgentEval, octagonTranscriptsAgentEval, octagonFinancialsAgentEval, octagonStockDataAgentEval, octagonCompaniesAgentEval] +}; + +export default config; + +export const evals = [octagonSecAgentEval, octagonTranscriptsAgentEval, octagonFinancialsAgentEval, octagonStockDataAgentEval, octagonCompaniesAgentEval]; \ No newline at end of file From 837eae978a4896fb8d84f24b1e55f5666608bf22 Mon Sep 17 00:00:00 2001 From: mat lenhard Date: Sun, 18 May 2025 08:48:39 -0400 Subject: [PATCH 2/2] fix readme spacing and eval location --- README.md | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 892a718..b11b142 100644 --- a/README.md +++ b/README.md @@ -73,16 +73,6 @@ If you see version numbers for all three, you are ready to proceed with the inst ## Installation - - -## Running evals - -The evals package loads an mcp client that then runs the index.ts file, so there is no need to rebuild between tests. You can load environment variables by prefixing the npx command. Full documentation can be found [here](https://www.mcpevals.io/docs). - -```bash -OPENAI_API_KEY=your-key npx mcp-eval src/evals/evals.ts src/index.ts -``` - ### Running on Claude Desktop To configure Octagon MCP for Claude Desktop: @@ -285,6 +275,14 @@ Research the financial impact of Apple's privacy changes on digital advertising 2. **Connection Issues**: Make sure the connectivity to the Octagon API is working properly. 3. **Rate Limiting**: If you encounter rate limiting errors, reduce the frequency of your requests. +## Running Evals + +The evals package loads an mcp client that then runs the index.ts file, so there is no need to rebuild between tests. You can load environment variables by prefixing the npx command. Full documentation can be found [here](https://www.mcpevals.io/docs). + +```bash +OPENAI_API_KEY=your-key npx mcp-eval src/evals/evals.ts src/index.ts +``` + ## Installation ### Running with npx