browserbase · Mochael · Jul 14, 2025 · Jul 14, 2025 · Jul 15, 2025 · Jul 15, 2025
diff --git a/examples/external_clients/google_vertex.ts b/examples/external_clients/google_vertex.ts
@@ -0,0 +1,39 @@
+import { Stagehand } from "@browserbasehq/stagehand";
+import { z } from "zod";
+
+/**
+ * Example of using Google Vertex AI directly (not through AI SDK).
+ * When you provide `vertexai: true` in the client options,
+ * the system will route to GoogleVertexClient instead of using AI SDK.
+ */
+async function main() {
+  const stagehand = new Stagehand({
+    env: "LOCAL",
+    enableCaching: false,
+    modelName: "google/gemini-1.5-pro", // Google model with slash notation
+    modelClientOptions: {
+      // Vertex AI specific configuration - bypasses AI SDK
+      vertexai: true,
+      project: "your-gcp-project-id",
+      location: "us-central1",
+      // Optional: API key if not using default auth
+      // apiKey: process.env.GOOGLE_API_KEY,
+    },
+  });
+
+  await stagehand.init();
+  await stagehand.page.goto("https://docs.stagehand.dev");
+
+  // Extract some text using Vertex AI (not AI SDK)
+  const result = await stagehand.page.extract({
+    instruction: "extract the main heading of this page",
+    schema: z.object({
+      heading: z.string(),
+    }),
+  });
+
+  console.log("Extracted:", result);
+  await stagehand.close();
+}
+
+main().catch(console.error);
diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts
@@ -652,9 +652,17 @@ ${scriptContent} \
       }
 
       const requestId = Math.random().toString(36).substring(2);
-      const llmClient: LLMClient = modelName
-        ? this.stagehand.llmProvider.getClient(modelName, modelClientOptions)
-        : this.llmClient;
+
+      // Use provided modelName if available AND if modelClientOptions has an API key, otherwise use the configured llmClient
+      const llmClient =
+        modelName && modelClientOptions?.apiKey
+          ? this.stagehand.llmProvider.getClient(modelName, modelClientOptions)
+          : this.stagehand.llmClient;
+
+      // Add null check for llmClient before accessing modelName
+      if (!llmClient) {
+        throw new MissingLLMConfigurationError();
+      }
 
       this.stagehand.log({
         category: "act",
@@ -746,9 +754,17 @@ ${scriptContent} \
       }
 
       const requestId = Math.random().toString(36).substring(2);
-      const llmClient = modelName
-        ? this.stagehand.llmProvider.getClient(modelName, modelClientOptions)
-        : this.llmClient;
+
+      // Use provided modelName if available AND if modelClientOptions has an API key, otherwise use the configured llmClient
+      const llmClient =
+        modelName && modelClientOptions?.apiKey
+          ? this.stagehand.llmProvider.getClient(modelName, modelClientOptions)
+          : this.stagehand.llmClient;
+
+      // Add null check for llmClient before accessing modelName
+      if (!llmClient) {
+        throw new MissingLLMConfigurationError();
+      }
 
       this.stagehand.log({
         category: "extract",
@@ -850,9 +866,17 @@ ${scriptContent} \
       }
 
       const requestId = Math.random().toString(36).substring(2);
-      const llmClient = modelName
-        ? this.stagehand.llmProvider.getClient(modelName, modelClientOptions)
-        : this.llmClient;
+
+      // Use provided modelName if available AND if modelClientOptions has an API key, otherwise use the configured llmClient
+      const llmClient =
+        modelName && modelClientOptions?.apiKey
+          ? this.stagehand.llmProvider.getClient(modelName, modelClientOptions)
+          : this.stagehand.llmClient;
+
+      // Add null check for llmClient before accessing modelName
+      if (!llmClient) {
+        throw new MissingLLMConfigurationError();
+      }
 
       this.stagehand.log({
         category: "observe",

diff --git a/lib/handlers/handlerUtils/actHandlerUtils.ts b/lib/handlers/handlerUtils/actHandlerUtils.ts
@@ -8,32 +8,51 @@ import { StagehandClickError } from "@/types/stagehandErrors";
 
 const IFRAME_STEP_RE = /^iframe(\[[^\]]+])?$/i;
 
-export function deepLocator(root: Page | FrameLocator, xpath: string): Locator {
-  // 1 ─ prepend with slash if not already included
-  if (!xpath.startsWith("/")) xpath = "/" + xpath;
+export function deepLocator(
+  root: Page | FrameLocator,
+  rawXPath: string,
+): Locator {
+  // 1 ─ strip optional 'xpath=' prefix and whitespace
+  const xpath = rawXPath.replace(/^xpath=/i, "").trim();
+
+  // Split the path by sequences of slashes, but keep the slashes as
+  // separate elements in the array. This preserves separators like '//'.
+  // e.g., '//a/b' becomes ['//', 'a', '/', 'b']
+  const parts = xpath.split(/(\/+)/).filter(Boolean);
 
-  // 2 ─ split into steps, accumulate until we hit an iframe step
-  const steps = xpath.split("/").filter(Boolean); // tokens
   let ctx: Page | FrameLocator = root;
   let buffer: string[] = [];
 
   const flushIntoFrame = () => {
     if (buffer.length === 0) return;
-    const selector = "xpath=/" + buffer.join("/");
+
+    // Join the buffered parts to form the selector for the iframe.
+    // .join('') is used because the separators are already in the buffer.
+    const selector = "xpath=" + buffer.join("");
     ctx = (ctx as Page | FrameLocator).frameLocator(selector);
-    buffer = [];
+    buffer = []; // Reset buffer for the next path segment.
   };
 
-  for (const step of steps) {
-    buffer.push(step);
-    if (IFRAME_STEP_RE.test(step)) {
-      // we've included the <iframe> element in buffer ⇒ descend
+  // Iterate through all parts (which include both steps and their separators).
+  for (const part of parts) {
+    buffer.push(part);
+
+    // A "step" is a part of the path that is NOT a separator.
+    // We test if the current step (a non-slash part) is an iframe locator.
+    const isStep = !/^\/+$/.test(part);
+    if (isStep && IFRAME_STEP_RE.test(part)) {
       flushIntoFrame();
     }
   }
 
-  // 3 ─ whatever is left in buffer addresses the target *inside* the last ctx
-  const finalSelector = "xpath=/" + buffer.join("/");
+  // If the XPath ended with an iframe, the buffer will be empty. In this case,
+  // we return a locator for the root element ('*') within the final frame.
+  if (buffer.length === 0) {
+    return (ctx as Page | FrameLocator).locator("xpath=/*");
+  }
+
+  // Join the remaining parts for the final locator.
+  const finalSelector = "xpath=" + buffer.join("");
   return (ctx as Page | FrameLocator).locator(finalSelector);
 }
 

diff --git a/lib/index.ts b/lib/index.ts
@@ -583,23 +583,30 @@ export class Stagehand {
 
     let modelApiKey: string | undefined;
 
-    if (!modelClientOptions?.apiKey) {
+    const usingVertexAI =
+      "vertexai" in modelClientOptions && modelClientOptions.vertexai;
+    const provider = LLMProvider.getModelProvider(
+      this.modelName,
+      usingVertexAI,
+    );
+
+    if (!modelClientOptions?.apiKey && !usingVertexAI) {
       // If no API key is provided, try to load it from the environment
-      if (LLMProvider.getModelProvider(this.modelName) === "aisdk") {
+      if (provider === "aisdk") {
         modelApiKey = loadApiKeyFromEnv(
           this.modelName.split("/")[0],
           this.logger,
         );
       } else {
         // Temporary add for legacy providers
         modelApiKey =
-          LLMProvider.getModelProvider(this.modelName) === "openai"
+          provider === "openai"
             ? process.env.OPENAI_API_KEY ||
               this.llmClient?.clientOptions?.apiKey
-            : LLMProvider.getModelProvider(this.modelName) === "anthropic"
+            : provider === "anthropic"
               ? process.env.ANTHROPIC_API_KEY ||
                 this.llmClient?.clientOptions?.apiKey
-              : LLMProvider.getModelProvider(this.modelName) === "google"
+              : provider === "google"
                 ? process.env.GOOGLE_API_KEY ||
                   this.llmClient?.clientOptions?.apiKey
                 : undefined;

diff --git a/lib/llm/GoogleClient.ts b/lib/llm/GoogleClient.ts
@@ -1,32 +1,33 @@
 import {
+  Content,
+  FunctionCall,
   GoogleGenAI,
-  HarmCategory,
+  GoogleGenAIOptions,
   HarmBlockThreshold,
-  Content,
+  HarmCategory,
   Part,
-  Tool,
-  FunctionCall,
   Schema,
+  Tool,
   Type,
 } from "@google/genai";
 import zodToJsonSchema from "zod-to-json-schema";
 
+import {
+  CreateChatCompletionResponseError,
+  StagehandError,
+} from "@/types/stagehandErrors";
 import { LogLine } from "../../types/log";
 import { AvailableModel, ClientOptions } from "../../types/model";
 import { LLMCache } from "../cache/LLMCache";
-import { validateZodSchema, toGeminiSchema, loadApiKeyFromEnv } from "../utils";
+import { toGeminiSchema, validateZodSchema } from "../utils";
 import {
+  AnnotatedScreenshotText,
   ChatCompletionOptions,
   ChatMessage,
   CreateChatCompletionOptions,
   LLMClient,
   LLMResponse,
-  AnnotatedScreenshotText,
 } from "./LLMClient";
-import {
-  CreateChatCompletionResponseError,
-  StagehandError,
-} from "@/types/stagehandErrors";
 
 // Mapping from generic roles to Gemini roles
 const roleMap: { [key in ChatMessage["role"]]: string } = {
@@ -60,7 +61,7 @@ export class GoogleClient extends LLMClient {
   private client: GoogleGenAI;
   private cache: LLMCache | undefined;
   private enableCaching: boolean;
-  public clientOptions: ClientOptions;
+  public clientOptions: GoogleGenAIOptions;
   public hasVision: boolean;
   private logger: (message: LogLine) => void;
 
@@ -78,12 +79,16 @@ export class GoogleClient extends LLMClient {
     clientOptions?: ClientOptions; // Expecting { apiKey: string } here
   }) {
     super(modelName);
-    if (!clientOptions?.apiKey) {
-      // Try to get the API key from the environment variable GOOGLE_API_KEY
-      clientOptions.apiKey = loadApiKeyFromEnv("google_legacy", logger);
-    }
-    this.clientOptions = clientOptions;
-    this.client = new GoogleGenAI({ apiKey: clientOptions.apiKey });
+    this.clientOptions = clientOptions as GoogleGenAIOptions;
+    this.client = new GoogleGenAI({
+      apiKey: this.clientOptions.apiKey,
+      vertexai: this.clientOptions.vertexai,
+      project: this.clientOptions.project,
+      location: this.clientOptions.location,
+      apiVersion: this.clientOptions.apiVersion,
+      googleAuthOptions: this.clientOptions.googleAuthOptions,
+      httpOptions: this.clientOptions.httpOptions,
+    });
     this.cache = cache;
     this.enableCaching = enableCaching;
     this.modelName = modelName;

diff --git a/lib/llm/GoogleVertexClient.ts b/lib/llm/GoogleVertexClient.ts
@@ -0,0 +1,50 @@
+import { GoogleGenAIOptions } from "@google/genai";
+import { LogLine } from "../../types/log";
+import { AvailableModel, ClientOptions } from "../../types/model";
+import { LLMCache } from "../cache/LLMCache";
+import { GoogleClient } from "./GoogleClient";
+
+export interface GoogleVertexClientOptions extends GoogleGenAIOptions {
+  vertexai: boolean;
+  project: string;
+  location: string;
+}
+
+export class GoogleVertexClient extends GoogleClient {
+  constructor({
+    logger,
+    enableCaching = false,
+    cache,
+    modelName,
+    clientOptions,
+  }: {
+    logger: (message: LogLine) => void;
+    enableCaching?: boolean;
+    cache?: LLMCache;
+    modelName: AvailableModel;
+    clientOptions?: ClientOptions;
+  }) {
+    // Ensure vertex ai configuration is present
+    const vertexOptions = clientOptions as GoogleVertexClientOptions;
+    if (!vertexOptions?.vertexai) {
+      throw new Error("GoogleVertexClient requires vertexai option to be true");
+    }
+    if (!vertexOptions?.project) {
+      throw new Error("GoogleVertexClient requires project configuration");
+    }
+    if (!vertexOptions?.location) {
+      throw new Error("GoogleVertexClient requires location configuration");
+    }
+
+    super({
+      logger,
+      enableCaching,
+      cache,
+      modelName,
+      clientOptions: {
+        ...vertexOptions,
+        vertexai: true,
+      },
+    });
+  }
+}