Skip to content

Commit f369529

Browse files
feat: add support for Google LLM provider and update environment configuration
1 parent ceefad7 commit f369529

File tree

10 files changed

+87
-33
lines changed

10 files changed

+87
-33
lines changed

example.env

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
1+
LLM_PROVIDER=google
12
OPENAI_API_KEY=OPENAI_API_KEY
2-
LLM_MODEL=gpt-4o-mini
3+
AUTOBROWSE_LLM_MODEL=gpt-4o-mini
4+
GOOGLE_API_KEY=GOOGLE_API_KEY

package-lock.json

Lines changed: 26 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"license": "MIT",
4040
"dependencies": {
4141
"@langchain/core": "^0.3.43",
42+
"@langchain/google-genai": "^0.2.1",
4243
"@langchain/langgraph": "^0.2.60",
4344
"@langchain/openai": "^0.5.0",
4445
"@modelcontextprotocol/sdk": "^1.8.0",
@@ -48,8 +49,8 @@
4849
"zod": "^3.24.2"
4950
},
5051
"devDependencies": {
51-
"@types/node": "^20.0.0",
5252
"@types/dotenv": "^6.1.1",
53+
"@types/node": "^20.0.0",
5354
"@typescript-eslint/eslint-plugin": "^6.0.0",
5455
"@typescript-eslint/parser": "^6.0.0",
5556
"eslint": "^8.0.0",

src/auto.ts

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,16 @@
11
import { test as base } from '@playwright/test';
22
import { AutoConfig } from './types';
33
import { sessionManager, context } from './browser';
4-
import { ChatOpenAI } from "@langchain/openai";
54
import { createReactAgent } from "@langchain/langgraph/prebuilt";
65
import { HumanMessage } from "@langchain/core/messages";
7-
import dotenv from 'dotenv';
6+
import { createLLMModel } from './llm';
87
import {
98
browser_click, browser_type, browser_get_text, browser_navigate, browser_snapshot,
109
browser_hover, browser_drag, browser_select_option, browser_take_screenshot,
1110
browser_go_back, browser_wait, browser_press_key, browser_save_pdf, browser_choose_file,
1211
browser_go_forward, browser_assert
1312
} from './tools';
1413

15-
// Load environment variables
16-
dotenv.config();
17-
const openai_llm_model = process.env.LLM_MODEL || 'gpt-4o-mini';
18-
19-
const openai_model = new ChatOpenAI({
20-
modelName: openai_llm_model,
21-
temperature: 0,
22-
});
23-
2414
// Extend base test to automatically track page
2515
export const test = base.extend({
2616
page: async ({ page }, use) => {
@@ -31,7 +21,7 @@ export const test = base.extend({
3121

3222
// Initialize the LangChain agent with more detailed instructions
3323
const initializeAgent = () => {
34-
const model = openai_model;
24+
const model = createLLMModel();
3525

3626
const prompt =
3727
`You are a web automation assistant. When given a natural language instruction:
@@ -67,8 +57,6 @@ const initializeAgent = () => {
6757
return { agent };
6858
};
6959

70-
71-
7260
// Main auto function that processes instructions
7361
export async function auto(instruction: string, config?: AutoConfig): Promise<any> {
7462
console.log(`[Auto] Processing instruction: "${instruction}"`);

src/llm.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import { ChatOpenAI } from "@langchain/openai";
2+
import { ChatGoogleGenerativeAI } from "@langchain/google-genai";
3+
import dotenv from 'dotenv';
4+
5+
// Load environment variables
6+
dotenv.config();
7+
8+
export function createLLMModel() {
9+
const provider = process.env.LLM_PROVIDER || 'openai';
10+
const model = process.env.AUTOBROWSE_LLM_MODEL || 'gpt-4o-mini';
11+
12+
if (provider === 'google')
13+
{
14+
return new ChatGoogleGenerativeAI({
15+
model: model
16+
});
17+
}
18+
19+
return new ChatOpenAI({
20+
modelName: model,
21+
temperature: 0,
22+
});
23+
}

src/tools/browser_go_back.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@ import { context } from '../browser/context';
55

66
/**
77
* Schema for going back in browser history
8+
* Includes dummy property to satisfy Gemini's API requirement for non-empty object properties
89
*/
9-
const goBackSchema = z.object({});
10+
const goBackSchema = z.object({
11+
_: z.string().optional().describe('No parameters required for this operation')
12+
});
1013

1114
export const browser_go_back = tool(
1215
async () => {

src/tools/browser_go_forward.ts

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,39 +4,42 @@ import { runAndWait } from './utils';
44
import { context } from '../browser/context';
55

66
/**
7-
* Schema for going back in browser history
7+
* Schema for going forward in browser history
8+
* Includes dummy property to satisfy Gemini's API requirement for non-empty object properties
89
*/
9-
const goBackSchema = z.object({});
10+
const goForwardSchema = z.object({
11+
_: z.string().optional().describe('No parameters required for this operation')
12+
});
1013

1114
export const browser_go_forward = tool(
1215
async () => {
1316
try
1417
{
15-
console.log(`[Go Back Tool] Starting operation`);
18+
console.log(`[Go Forward Tool] Starting operation`);
1619

1720
const result = await runAndWait(
1821
context,
19-
'Navigated back',
22+
'Navigated forward',
2023
async (page) => {
21-
console.log(`[Go Back Tool] Going back to previous page`);
24+
console.log(`[Go Forward Tool] Going forward to next page`);
2225
await page.goForward();
23-
console.log(`[Go Back Tool] Operation successful`);
26+
console.log(`[Go Forward Tool] Operation successful`);
2427
},
2528
true
2629
);
2730

28-
console.log(`[Go Back Tool] Operation completed`);
31+
console.log(`[Go Forward Tool] Operation completed`);
2932
return result;
3033
} catch (error)
3134
{
32-
const errorMessage = `Failed to go back: ${error instanceof Error ? error.message : 'Unknown error'}`;
33-
console.error(`[Go Back Tool] Error:`, errorMessage);
35+
const errorMessage = `Failed to go forward: ${error instanceof Error ? error.message : 'Unknown error'}`;
36+
console.error(`[Go Forward Tool] Error:`, errorMessage);
3437
return errorMessage;
3538
}
3639
},
3740
{
38-
name: "goBack",
39-
description: "Go back to the previous page",
40-
schema: goBackSchema
41+
name: "goForward",
42+
description: "Go forward to the next page",
43+
schema: goForwardSchema
4144
}
4245
);

src/tools/browser_save_pdf.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@ import os from 'os';
55
import path from 'path';
66

77
/**
8-
* Schema for saving page as PDF
8+
* Schema for saving PDF
9+
* Includes dummy property to satisfy Gemini's API requirement for non-empty object properties
910
*/
10-
const pdfSchema = z.object({});
11+
const pdfSchema = z.object({
12+
_: z.string().optional().describe('No parameters required for this operation')
13+
});
1114

1215
export const browser_save_pdf = tool(
1316
async () => {

src/tools/browser_snapshot.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@ import { z } from "zod";
33
import { context } from '../browser/context';
44
import { captureAriaSnapshot } from './utils';
55

6-
const snapshotSchema = z.object({});
6+
/**
7+
* Schema with dummy property to satisfy Gemini's API requirement for non-empty object properties
8+
*/
9+
const snapshotSchema = z.object({
10+
_: z.string().optional().describe('No parameters required for this operation')
11+
});
712

813
export const browser_snapshot = tool(
914
async () => {

src/tools/browser_type.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ const typeSchema = z.object({
1010
element: z.string().describe('Human-readable element description for the target field'),
1111
ref: z.string().describe('Element reference from page snapshot to locate the field'),
1212
text: z.string().describe('The text to type into the element'),
13-
submit: z.boolean().optional().describe('Whether to submit by pressing Enter after typing').default(false)
13+
submit: z.boolean().optional().describe('Whether to submit by pressing Enter after typing')
1414
});
1515

1616
export const browser_type = tool(

0 commit comments

Comments
 (0)