From f20f2ac0d8139494f463cabc3fc1c44836f347b5 Mon Sep 17 00:00:00 2001 From: Zach Loza Date: Tue, 8 Jul 2025 13:24:56 -0700 Subject: [PATCH 1/3] Add better handling for native dropdowns --- stagehand/handlers/act_handler_utils.py | 22 +++++++ stagehand/llm/prompts.py | 4 +- tests/e2e/test_act_integration.py | 86 +++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 1 deletion(-) diff --git a/stagehand/handlers/act_handler_utils.py b/stagehand/handlers/act_handler_utils.py index 24950eb..0455820 100644 --- a/stagehand/handlers/act_handler_utils.py +++ b/stagehand/handlers/act_handler_utils.py @@ -339,6 +339,27 @@ async def press_key(ctx: MethodHandlerContext) -> None: raise e +async def select_option(ctx: MethodHandlerContext) -> None: + try: + text = str(ctx.args[0]) if ctx.args and ctx.args[0] is not None else "" + await ctx.locator.select_option(text, timeout=5_000) + except Exception as e: + ctx.logger.error( + message="error selecting option", + category="action", + auxiliary={ + "error": {"value": str(e), "type": "string"}, + "trace": { + "value": getattr(e, "__traceback__", ""), + "type": "string", + }, + "xpath": {"value": ctx.xpath, "type": "string"}, + "args": {"value": json.dumps(ctx.args), "type": "object"}, + }, + ) + raise e + + async def click_element(ctx: MethodHandlerContext) -> None: ctx.logger.debug( message=f"page URL before click {ctx.stagehand_page._page.url}", @@ -500,4 +521,5 @@ async def handle_possible_page_navigation( "click": click_element, "nextChunk": scroll_to_next_chunk, "prevChunk": scroll_to_previous_chunk, + "selectOptionFromDropdown": select_option, } diff --git a/stagehand/llm/prompts.py b/stagehand/llm/prompts.py index f1d20f0..5080a85 100644 --- a/stagehand/llm/prompts.py +++ b/stagehand/llm/prompts.py @@ -200,7 +200,9 @@ def build_act_observe_prompt( ONLY return one action. If multiple actions are relevant, return the most relevant one. If the user is asking to scroll to a position on the page, e.g., 'halfway' or 0.75, etc, you must return the argument formatted as the correct percentage, e.g., '50%' or '75%', etc. If the user is asking to scroll to the next chunk/previous chunk, choose the nextChunk/prevChunk method. No arguments are required here. -If the action implies a key press, e.g., 'press enter', 'press a', 'press space', etc., always choose the press method with the appropriate key as argument — e.g. 'a', 'Enter', 'Space'. Do not choose a click action on an on-screen keyboard. Capitalize the first character like 'Enter', 'Tab', 'Escape' only for special keys.""" +If the action implies a key press, e.g., 'press enter', 'press a', 'press space', etc., always choose the press method with the appropriate key as argument — e.g. 'a', 'Enter', 'Space'. Do not choose a click action on an on-screen keyboard. Capitalize the first character like 'Enter', 'Tab', 'Escape' only for special keys. +If the action implies choosing an option from a dropdown, AND the corresponding element is a 'select' element, choose the selectOptionFromDropdown method. The argument should be the text of the option to select. +If the action implies choosing an option from a dropdown, and the corresponding element is NOT a 'select' element, choose the click method.""" if variables and len(variables) > 0: variables_prompt = f"The following variables are available to use in the action: {', '.join(variables.keys())}. Fill the argument variables with the variable name." diff --git a/tests/e2e/test_act_integration.py b/tests/e2e/test_act_integration.py index c6eb4d4..c86c4b1 100644 --- a/tests/e2e/test_act_integration.py +++ b/tests/e2e/test_act_integration.py @@ -109,6 +109,92 @@ async def test_form_filling_browserbase(self, browserbase_stagehand): assert filled_name is not None assert len(filled_name) > 0 + @pytest.mark.asyncio + @pytest.mark.local + async def test_selecting_option_local(self, local_stagehand): + """Test option selecting capability in LOCAL mode""" + stagehand = local_stagehand + + # Navigate to a page with a form containing a dropdown + await stagehand.page.goto("https://browserbase.github.io/stagehand-eval-sites/sites/nested-dropdown/") + + # Select an option from the dropdown. + await stagehand.page.act("Choose 'Smog Check Technician' from the 'License Type' dropdown") + + # Verify the selected option. + selected_option = await stagehand.page.locator( + "xpath=/html/body/form/div[1]/div[3]/article/div[2]/div[1]/select[2] >> option:checked" + ).text_content() + + assert selected_option == "Smog Check Technician" + + @pytest.mark.asyncio + @pytest.mark.browserbase + @pytest.mark.skipif( + not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")), + reason="Browserbase credentials not available" + ) + async def test_selecting_option_browserbase(self, browserbase_stagehand): + """Test option selecting capability in BROWSERBASE mode""" + stagehand = browserbase_stagehand + + # Navigate to a page with a form containing a dropdown + await stagehand.page.goto("https://browserbase.github.io/stagehand-eval-sites/sites/nested-dropdown/") + + # Select an option from the dropdown. + await stagehand.page.act("Choose 'Smog Check Technician' from the 'License Type' dropdown") + + # Verify the selected option. + selected_option = await stagehand.page.locator( + "xpath=/html/body/form/div[1]/div[3]/article/div[2]/div[1]/select[2] >> option:checked" + ).text_content() + + assert selected_option == "Smog Check Technician" + + @pytest.mark.asyncio + @pytest.mark.local + async def test_selecting_option_custom_input_local(self, local_stagehand): + """Test not selecting option on custom select input in LOCAL mode""" + stagehand = local_stagehand + + # Navigate to a page with a form containing a dropdown + await stagehand.page.goto("https://browserbase.github.io/stagehand-eval-sites/sites/expand-dropdown/") + + # Select an option from the dropdown. + await stagehand.page.act("Click the 'Select a Country' dropdown") + + # Wait for dropdown to expand + await asyncio.sleep(1) + + # We are expecting stagehand to click the dropdown to expand it, and therefore + # the available options should now be contained in the full a11y tree. + + # To test, we'll grab the full a11y tree, and make sure it contains 'Green' + extraction = await stagehand.page.extract() + assert "Canada" in extraction.data + + @pytest.mark.asyncio + @pytest.mark.local + async def test_selecting_option_hidden_input_local(self, local_stagehand): + """Test not selecting option on hidden input in LOCAL mode""" + stagehand = local_stagehand + + # Navigate to a page with a form containing a dropdown + await stagehand.page.goto("https://browserbase.github.io/stagehand-eval-sites/sites/hidden-input-dropdown/") + + # Select an option from the dropdown. + await stagehand.page.act("Click to expand the 'Favourite Colour' dropdown") + + # Wait for dropdown to expand + await asyncio.sleep(1) + + # We are expecting stagehand to click the dropdown to expand it, and therefore + # the available options should now be contained in the full a11y tree. + + # To test, we'll grab the full a11y tree, and make sure it contains 'Green' + extraction = await stagehand.page.extract() + assert "Green" in extraction.data + @pytest.mark.asyncio @pytest.mark.local async def test_button_clicking_local(self, local_stagehand): From c9ff033404a63caeb623f18f4784f03b22de7ff2 Mon Sep 17 00:00:00 2001 From: Zach Loza Date: Tue, 8 Jul 2025 13:36:53 -0700 Subject: [PATCH 2/3] Update comment --- tests/e2e/test_act_integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/test_act_integration.py b/tests/e2e/test_act_integration.py index c86c4b1..221dea8 100644 --- a/tests/e2e/test_act_integration.py +++ b/tests/e2e/test_act_integration.py @@ -169,7 +169,7 @@ async def test_selecting_option_custom_input_local(self, local_stagehand): # We are expecting stagehand to click the dropdown to expand it, and therefore # the available options should now be contained in the full a11y tree. - # To test, we'll grab the full a11y tree, and make sure it contains 'Green' + # To test, we'll grab the full a11y tree, and make sure it contains 'Canada' extraction = await stagehand.page.extract() assert "Canada" in extraction.data From 775bf1b35de552785263f572287f5c010b07e783 Mon Sep 17 00:00:00 2001 From: Zach Loza Date: Mon, 14 Jul 2025 11:32:34 -0700 Subject: [PATCH 3/3] Add role replacement --- stagehand/a11y/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/stagehand/a11y/utils.py b/stagehand/a11y/utils.py index 17d6359..c40cd91 100644 --- a/stagehand/a11y/utils.py +++ b/stagehand/a11y/utils.py @@ -95,6 +95,11 @@ async def _clean_structural_nodes( auxiliary={"error": {"value": str(resolve_error), "type": "string"}}, ) + # Rename `combobox` to `select` as they are functionally equivalent. + if node_role == "combobox": + node_role = "select" + node['role'] = node_role + # Remove redundant StaticText children cleaned_children = _remove_redundant_static_text_children(node, cleaned_children)