Skip to content

Commit be7bb65

Browse files
authored
Merge pull request designcomputer#41 from mclenhard/feat/add-e2e-test-and-evals
feat - e2e tests + evals
2 parents d678631 + 8e44178 commit be7bb65

File tree

4 files changed

+6097
-0
lines changed

4 files changed

+6097
-0
lines changed

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,15 @@ pnpm run setup:test:db
470470
pnpm test
471471
```
472472

473+
474+
475+
## Running evals
476+
477+
The evals package loads an mcp client that then runs the index.ts file, so there is no need to rebuild between tests. You can load environment variables by prefixing the npx command. Full documentation can be found [here](https://www.mcpevals.io/docs).
478+
479+
```bash
480+
OPENAI_API_KEY=your-key npx mcp-eval evals.ts index.ts
481+
```
473482
## Troubleshooting
474483

475484
### Common Issues

evals.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//evals.ts
2+
3+
import { EvalConfig } from 'mcp-evals';
4+
import { openai } from "@ai-sdk/openai";
5+
import { grade, EvalFunction } from "mcp-evals";
6+
7+
const mysql_queryEval: EvalFunction = {
8+
name: 'mysql_query Tool Evaluation',
9+
description: 'Evaluates the MySQL query execution functionality',
10+
run: async () => {
11+
const result = await grade(openai("gpt-4"), "Please execute the following SQL query and return the results: SELECT * FROM employees WHERE status='ACTIVE';");
12+
return JSON.parse(result);
13+
}
14+
};
15+
16+
const mysql_queryEval: EvalFunction = {
17+
name: 'mysql_query Tool Evaluation',
18+
description: 'Evaluates the MySQL query tool for correct SQL generation and execution',
19+
run: async () => {
20+
const result = await grade(openai("gpt-4"), "Use the mysql_query tool to select all rows from the 'users' table where isActive = 1. Provide the SQL query in the correct format.");
21+
return JSON.parse(result);
22+
}
23+
};
24+
25+
const mysql_queryEval: EvalFunction = {
26+
name: 'mysql_queryEval',
27+
description: 'Evaluates the mysql_query tool',
28+
run: async () => {
29+
const result = await grade(openai("gpt-4"), "Please provide a SQL query to retrieve the id, name, and email columns for all records in the users table.");
30+
return JSON.parse(result);
31+
}
32+
};
33+
34+
const config: EvalConfig = {
35+
model: openai("gpt-4"),
36+
evals: [mysql_queryEval, mysql_queryEval, mysql_queryEval]
37+
};
38+
39+
export default config;
40+
41+
export const evals = [mysql_queryEval, mysql_queryEval, mysql_queryEval];

0 commit comments

Comments
 (0)