Skip to content

Commit 5e70c8d

Browse files
miguelg719Zach10za
andauthored
Add better handling for native dropdowns (#140) (#147)
* Add better handling for native dropdowns (#140) * Add better handling for native dropdowns * Update comment * Add role replacement * patch and changeset * clean up --------- Co-authored-by: Zach Loza <Zach10za@users.noreply.github.com>
1 parent ca22d08 commit 5e70c8d

File tree

5 files changed

+119
-2
lines changed

5 files changed

+119
-2
lines changed

.changeset/amusing-ivory-termite.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"stagehand": patch
3+
---
4+
5+
Add support for handling OS-level dropdowns

stagehand/a11y/utils.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ async def _clean_structural_nodes(
5757
page
5858
and logger
5959
and backend_node_id is not None
60-
and node_role in ("generic", "none")
60+
and node_role in ("generic", "combobox", "none")
6161
):
6262
try:
6363
resolved_node = await page.send_cdp(
@@ -78,6 +78,8 @@ async def _clean_structural_nodes(
7878
)
7979
result_value = tag_name_result.get("result", {}).get("value")
8080
if result_value:
81+
if node_role == "combobox" and result_value == "select":
82+
result_value = "select"
8183
node["role"] = result_value
8284
node_role = result_value
8385
except Exception as tag_name_error:

stagehand/handlers/act_handler_utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,27 @@ async def press_key(ctx: MethodHandlerContext) -> None:
339339
raise e
340340

341341

342+
async def select_option(ctx: MethodHandlerContext) -> None:
343+
try:
344+
text = str(ctx.args[0]) if ctx.args and ctx.args[0] is not None else ""
345+
await ctx.locator.select_option(text, timeout=5_000)
346+
except Exception as e:
347+
ctx.logger.error(
348+
message="error selecting option",
349+
category="action",
350+
auxiliary={
351+
"error": {"value": str(e), "type": "string"},
352+
"trace": {
353+
"value": getattr(e, "__traceback__", ""),
354+
"type": "string",
355+
},
356+
"xpath": {"value": ctx.xpath, "type": "string"},
357+
"args": {"value": json.dumps(ctx.args), "type": "object"},
358+
},
359+
)
360+
raise e
361+
362+
342363
async def click_element(ctx: MethodHandlerContext) -> None:
343364
ctx.logger.debug(
344365
message=f"page URL before click {ctx.stagehand_page._page.url}",
@@ -500,4 +521,5 @@ async def handle_possible_page_navigation(
500521
"click": click_element,
501522
"nextChunk": scroll_to_next_chunk,
502523
"prevChunk": scroll_to_previous_chunk,
524+
"selectOptionFromDropdown": select_option,
503525
}

stagehand/llm/prompts.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,9 @@ def build_act_observe_prompt(
200200
ONLY return one action. If multiple actions are relevant, return the most relevant one.
201201
If the user is asking to scroll to a position on the page, e.g., 'halfway' or 0.75, etc, you must return the argument formatted as the correct percentage, e.g., '50%' or '75%', etc.
202202
If the user is asking to scroll to the next chunk/previous chunk, choose the nextChunk/prevChunk method. No arguments are required here.
203-
If the action implies a key press, e.g., 'press enter', 'press a', 'press space', etc., always choose the press method with the appropriate key as argument — e.g. 'a', 'Enter', 'Space'. Do not choose a click action on an on-screen keyboard. Capitalize the first character like 'Enter', 'Tab', 'Escape' only for special keys."""
203+
If the action implies a key press, e.g., 'press enter', 'press a', 'press space', etc., always choose the press method with the appropriate key as argument — e.g. 'a', 'Enter', 'Space'. Do not choose a click action on an on-screen keyboard. Capitalize the first character like 'Enter', 'Tab', 'Escape' only for special keys.
204+
If the action implies choosing an option from a dropdown, AND the corresponding element is a 'select' element, choose the selectOptionFromDropdown method. The argument should be the text of the option to select.
205+
If the action implies choosing an option from a dropdown, and the corresponding element is NOT a 'select' element, choose the click method."""
204206

205207
if variables and len(variables) > 0:
206208
variables_prompt = f"The following variables are available to use in the action: {', '.join(variables.keys())}. Fill the argument variables with the variable name."

tests/e2e/test_act_integration.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,92 @@ async def test_form_filling_browserbase(self, browserbase_stagehand):
109109
assert filled_name is not None
110110
assert len(filled_name) > 0
111111

112+
@pytest.mark.asyncio
113+
@pytest.mark.local
114+
async def test_selecting_option_local(self, local_stagehand):
115+
"""Test option selecting capability in LOCAL mode"""
116+
stagehand = local_stagehand
117+
118+
# Navigate to a page with a form containing a dropdown
119+
await stagehand.page.goto("https://browserbase.github.io/stagehand-eval-sites/sites/nested-dropdown/")
120+
121+
# Select an option from the dropdown.
122+
await stagehand.page.act("Choose 'Smog Check Technician' from the 'License Type' dropdown")
123+
124+
# Verify the selected option.
125+
selected_option = await stagehand.page.locator(
126+
"xpath=/html/body/form/div[1]/div[3]/article/div[2]/div[1]/select[2] >> option:checked"
127+
).text_content()
128+
129+
assert selected_option == "Smog Check Technician"
130+
131+
@pytest.mark.asyncio
132+
@pytest.mark.browserbase
133+
@pytest.mark.skipif(
134+
not (os.getenv("BROWSERBASE_API_KEY") and os.getenv("BROWSERBASE_PROJECT_ID")),
135+
reason="Browserbase credentials not available"
136+
)
137+
async def test_selecting_option_browserbase(self, browserbase_stagehand):
138+
"""Test option selecting capability in BROWSERBASE mode"""
139+
stagehand = browserbase_stagehand
140+
141+
# Navigate to a page with a form containing a dropdown
142+
await stagehand.page.goto("https://browserbase.github.io/stagehand-eval-sites/sites/nested-dropdown/")
143+
144+
# Select an option from the dropdown.
145+
await stagehand.page.act("Choose 'Smog Check Technician' from the 'License Type' dropdown")
146+
147+
# Verify the selected option.
148+
selected_option = await stagehand.page.locator(
149+
"xpath=/html/body/form/div[1]/div[3]/article/div[2]/div[1]/select[2] >> option:checked"
150+
).text_content()
151+
152+
assert selected_option == "Smog Check Technician"
153+
154+
@pytest.mark.asyncio
155+
@pytest.mark.local
156+
async def test_selecting_option_custom_input_local(self, local_stagehand):
157+
"""Test not selecting option on custom select input in LOCAL mode"""
158+
stagehand = local_stagehand
159+
160+
# Navigate to a page with a form containing a dropdown
161+
await stagehand.page.goto("https://browserbase.github.io/stagehand-eval-sites/sites/expand-dropdown/")
162+
163+
# Select an option from the dropdown.
164+
await stagehand.page.act("Click the 'Select a Country' dropdown")
165+
166+
# Wait for dropdown to expand
167+
await asyncio.sleep(1)
168+
169+
# We are expecting stagehand to click the dropdown to expand it, and therefore
170+
# the available options should now be contained in the full a11y tree.
171+
172+
# To test, we'll grab the full a11y tree, and make sure it contains 'Canada'
173+
extraction = await stagehand.page.extract()
174+
assert "Canada" in extraction.data
175+
176+
@pytest.mark.asyncio
177+
@pytest.mark.local
178+
async def test_selecting_option_hidden_input_local(self, local_stagehand):
179+
"""Test not selecting option on hidden input in LOCAL mode"""
180+
stagehand = local_stagehand
181+
182+
# Navigate to a page with a form containing a dropdown
183+
await stagehand.page.goto("https://browserbase.github.io/stagehand-eval-sites/sites/hidden-input-dropdown/")
184+
185+
# Select an option from the dropdown.
186+
await stagehand.page.act("Click to expand the 'Favourite Colour' dropdown")
187+
188+
# Wait for dropdown to expand
189+
await asyncio.sleep(1)
190+
191+
# We are expecting stagehand to click the dropdown to expand it, and therefore
192+
# the available options should now be contained in the full a11y tree.
193+
194+
# To test, we'll grab the full a11y tree, and make sure it contains 'Green'
195+
extraction = await stagehand.page.extract()
196+
assert "Green" in extraction.data
197+
112198
@pytest.mark.asyncio
113199
@pytest.mark.local
114200
async def test_button_clicking_local(self, local_stagehand):

0 commit comments

Comments
 (0)