Skip to content

Commit dbbae59

Browse files
authored
fix: browser navigator (#212)
1 parent 4b9bee5 commit dbbae59

File tree

5 files changed

+230
-138
lines changed

5 files changed

+230
-138
lines changed

apps/omega/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
"asar:analyze": "asar extract out/Agent\\ TARS-darwin-arm64/Agent\\ TARS.app/Contents/Resources/app.asar ./dist/asar",
1414
"start": "electron-vite preview",
1515
"dev": "electron-vite dev",
16+
"prepare": "npm run build:reporter",
1617
"package": "electron-forge package",
1718
"build": "rimraf dist out && npm run typecheck && npm run build:reporter && electron-vite build && electron-forge make",
1819
"test": "vitest run",

packages/agent-infra/browser-use/assets/buildDomTree.js

+23-10
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,9 @@ window.buildDomTree = (
153153
sibling = sibling.previousSibling;
154154
}
155155

156-
const tagName = currentElement.nodeName.toLowerCase();
156+
const tagName = currentElement.nodeName
157+
? currentElement.nodeName.toLowerCase()
158+
: null;
157159
const xpathIndex = index > 0 ? `[${index + 1}]` : '';
158160
segments.unshift(`${tagName}${xpathIndex}`);
159161

@@ -173,7 +175,7 @@ window.buildDomTree = (
173175

174176
let path = [];
175177
while (element && element.nodeType === Node.ELEMENT_NODE) {
176-
let selector = element.nodeName.toLowerCase();
178+
let selector = element.nodeName ? element.nodeName.toLowerCase() : null;
177179

178180
// if element has ID, use ID selector
179181
if (element.id) {
@@ -193,7 +195,11 @@ window.buildDomTree = (
193195
let sibling = element;
194196
let nth = 1;
195197
while ((sibling = sibling.previousElementSibling)) {
196-
if (sibling.nodeName.toLowerCase() === element.nodeName.toLowerCase())
198+
if (
199+
sibling.nodeName &&
200+
element.nodeName &&
201+
sibling.nodeName.toLowerCase() === element.nodeName.toLowerCase()
202+
)
197203
nth++;
198204
}
199205
if (nth > 1) selector += `:nth-of-type(${nth})`;
@@ -204,7 +210,8 @@ window.buildDomTree = (
204210
if (
205211
element.parentNode &&
206212
(element.parentNode.id ||
207-
element.parentNode.nodeName.toLowerCase() === 'body')
213+
(element.parentNode.nodeName &&
214+
element.parentNode.nodeName.toLowerCase() === 'body'))
208215
) {
209216
if (element.parentNode.id) {
210217
path.unshift(`#${element.parentNode.id}`);
@@ -228,13 +235,15 @@ window.buildDomTree = (
228235
'link',
229236
'meta',
230237
]);
231-
return !leafElementDenyList.has(element.tagName.toLowerCase());
238+
return !leafElementDenyList.has(
239+
element.tagName ? element.tagName.toLowerCase() : null,
240+
);
232241
}
233242

234243
// Helper function to check if element is interactive
235244
function isInteractiveElement(element) {
236245
// Immediately return false for body tag
237-
if (element.tagName.toLowerCase() === 'body') {
246+
if (element.tagName && element.tagName.toLowerCase() === 'body') {
238247
return false;
239248
}
240249

@@ -291,7 +300,7 @@ window.buildDomTree = (
291300
'combobox',
292301
]);
293302

294-
const tagName = element.tagName.toLowerCase();
303+
const tagName = element.tagName ? element.tagName.toLowerCase() : null;
295304
const role = element.getAttribute('role');
296305
const ariaRole = element.getAttribute('aria-role');
297306
const tabIndex = element.getAttribute('tabindex');
@@ -309,6 +318,7 @@ window.buildDomTree = (
309318
interactiveRoles.has(ariaRole) ||
310319
(tabIndex !== null &&
311320
tabIndex !== '-1' &&
321+
element.parentElement?.tagName &&
312322
element.parentElement?.tagName.toLowerCase() !== 'body') ||
313323
element.getAttribute('data-action') === 'a-dropdown-select' ||
314324
element.getAttribute('data-action') === 'a-dropdown-button';
@@ -398,8 +408,10 @@ window.buildDomTree = (
398408

399409
// Additional check to prevent body from being marked as interactive
400410
if (
401-
element.tagName.toLowerCase() === 'body' ||
402-
element.parentElement?.tagName.toLowerCase() === 'body'
411+
(element.tagName && element.tagName.toLowerCase() === 'body') ||
412+
(element.parentElement &&
413+
element.parentElement.tagName &&
414+
element.parentElement.tagName.toLowerCase() === 'body')
403415
) {
404416
return false;
405417
}
@@ -538,7 +550,8 @@ window.buildDomTree = (
538550
rect.height !== 0 &&
539551
rect.top >= 0 &&
540552
rect.top <= window.innerHeight &&
541-
textNode.parentElement?.checkVisibility({
553+
textNode.parentElement &&
554+
textNode.parentElement.checkVisibility({
542555
checkOpacity: true,
543556
checkVisibilityCSS: true,
544557
})

packages/agent-infra/browser-use/src/browser/page.ts

+7-112
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ import {
3333
type PageState,
3434
} from './types';
3535
import { createLogger, getBuildDomTreeScript } from '../utils';
36-
import { waitForPageAndFramesLoad } from './utils';
36+
import {
37+
locateElement,
38+
scrollIntoViewIfNeeded,
39+
waitForPageAndFramesLoad,
40+
} from './utils';
3741

3842
const logger = createLogger('Page');
3943

@@ -768,64 +772,7 @@ export default class Page {
768772
}
769773

770774
async locateElement(element: DOMElementNode): Promise<ElementHandle | null> {
771-
if (!this._puppeteerPage) {
772-
// throw new Error('Puppeteer page is not connected');
773-
logger.warning('Puppeteer is not connected');
774-
return null;
775-
}
776-
let currentFrame: PuppeteerPage | Frame = this._puppeteerPage;
777-
778-
// Start with the target element and collect all parents
779-
const parents: DOMElementNode[] = [];
780-
let current = element;
781-
while (current.parent) {
782-
parents.push(current.parent);
783-
current = current.parent;
784-
}
785-
786-
// Process all iframe parents in sequence (in reverse order - top to bottom)
787-
const iframes = parents
788-
.reverse()
789-
.filter((item) => item.tagName === 'iframe');
790-
for (const parent of iframes) {
791-
const cssSelector = parent.enhancedCssSelectorForElement(
792-
this._config.includeDynamicAttributes,
793-
);
794-
const frameElement: ElementHandle | null =
795-
await currentFrame.$(cssSelector);
796-
if (!frameElement) {
797-
// throw new Error(`Could not find iframe with selector: ${cssSelector}`);
798-
logger.warning(`Could not find iframe with selector: ${cssSelector}`);
799-
return null;
800-
}
801-
const frame: Frame | null = await frameElement.contentFrame();
802-
if (!frame) {
803-
// throw new Error(`Could not access frame content for selector: ${cssSelector}`);
804-
logger.warning(
805-
`Could not access frame content for selector: ${cssSelector}`,
806-
);
807-
return null;
808-
}
809-
currentFrame = frame;
810-
}
811-
812-
const cssSelector = element.enhancedCssSelectorForElement(
813-
this._config.includeDynamicAttributes,
814-
);
815-
816-
try {
817-
const elementHandle: ElementHandle | null =
818-
await currentFrame.$(cssSelector);
819-
if (elementHandle) {
820-
// Scroll element into view if needed
821-
await this._scrollIntoViewIfNeeded(elementHandle);
822-
return elementHandle;
823-
}
824-
} catch (error) {
825-
logger.error('Failed to locate element:', error);
826-
}
827-
828-
return null;
775+
return await locateElement(this._puppeteerPage!, element, this._config);
829776
}
830777

831778
async inputTextElementNode(
@@ -877,59 +824,7 @@ export default class Page {
877824
element: ElementHandle,
878825
timeout = 2500,
879826
): Promise<void> {
880-
const startTime = Date.now();
881-
882-
// eslint-disable-next-line no-constant-condition
883-
while (true) {
884-
// Check if element is in viewport
885-
const isVisible = await element.evaluate((el) => {
886-
const rect = el.getBoundingClientRect();
887-
888-
// Check if element has size
889-
if (rect.width === 0 || rect.height === 0) return false;
890-
891-
// Check if element is hidden
892-
const style = window.getComputedStyle(el);
893-
if (
894-
style.visibility === 'hidden' ||
895-
style.display === 'none' ||
896-
style.opacity === '0'
897-
) {
898-
return false;
899-
}
900-
901-
// Check if element is in viewport
902-
const isInViewport =
903-
rect.top >= 0 &&
904-
rect.left >= 0 &&
905-
rect.bottom <=
906-
(window.innerHeight || document.documentElement.clientHeight) &&
907-
rect.right <=
908-
(window.innerWidth || document.documentElement.clientWidth);
909-
910-
if (!isInViewport) {
911-
// Scroll into view if not visible
912-
el.scrollIntoView({
913-
behavior: 'auto',
914-
block: 'center',
915-
inline: 'center',
916-
});
917-
return false;
918-
}
919-
920-
return true;
921-
});
922-
923-
if (isVisible) break;
924-
925-
// Check timeout
926-
if (Date.now() - startTime > timeout) {
927-
throw new Error('Timed out while trying to scroll element into view');
928-
}
929-
930-
// Small delay before next check
931-
await new Promise((resolve) => setTimeout(resolve, 100));
932-
}
827+
await scrollIntoViewIfNeeded(element, timeout);
933828
}
934829

935830
async clickElementNode(

packages/agent-infra/browser-use/src/browser/utils.ts

+128-1
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,139 @@ import {
22
type HTTPRequest,
33
type HTTPResponse,
44
} from 'puppeteer-core/lib/esm/puppeteer/puppeteer-core-browser.js';
5-
import { Page as PuppeteerPage } from 'puppeteer-core';
5+
import { ElementHandle, Frame, Page as PuppeteerPage } from 'puppeteer-core';
66
import {
77
BrowserContextConfig,
88
DEFAULT_BROWSER_CONTEXT_CONFIG,
99
PartialWithRequired,
1010
} from './types';
11+
import { DOMElementNode } from '../dom/views';
12+
13+
export async function scrollIntoViewIfNeeded(
14+
element: ElementHandle,
15+
timeout = 2500,
16+
): Promise<void> {
17+
const startTime = Date.now();
18+
19+
// eslint-disable-next-line no-constant-condition
20+
while (true) {
21+
// Check if element is in viewport
22+
const isVisible = await element.evaluate((el) => {
23+
const rect = el.getBoundingClientRect();
24+
25+
// Check if element has size
26+
if (rect.width === 0 || rect.height === 0) return false;
27+
28+
// Check if element is hidden
29+
const style = window.getComputedStyle(el);
30+
if (
31+
style.visibility === 'hidden' ||
32+
style.display === 'none' ||
33+
style.opacity === '0'
34+
) {
35+
return false;
36+
}
37+
38+
// Check if element is in viewport
39+
const isInViewport =
40+
rect.top >= 0 &&
41+
rect.left >= 0 &&
42+
rect.bottom <=
43+
(window.innerHeight || document.documentElement.clientHeight) &&
44+
rect.right <=
45+
(window.innerWidth || document.documentElement.clientWidth);
46+
47+
if (!isInViewport) {
48+
// Scroll into view if not visible
49+
el.scrollIntoView({
50+
behavior: 'auto',
51+
block: 'center',
52+
inline: 'center',
53+
});
54+
return false;
55+
}
56+
57+
return true;
58+
});
59+
60+
if (isVisible) break;
61+
62+
// Check timeout
63+
if (Date.now() - startTime > timeout) {
64+
throw new Error('Timed out while trying to scroll element into view');
65+
}
66+
67+
// Small delay before next check
68+
await new Promise((resolve) => setTimeout(resolve, 100));
69+
}
70+
}
71+
72+
export async function locateElement(
73+
page: PuppeteerPage,
74+
element: DOMElementNode,
75+
_options?: Partial<BrowserContextConfig>,
76+
): Promise<ElementHandle | null> {
77+
const options = {
78+
...DEFAULT_BROWSER_CONTEXT_CONFIG,
79+
..._options,
80+
};
81+
if (!page) {
82+
// throw new Error('Puppeteer page is not connected');
83+
console.warn('Puppeteer is not connected');
84+
return null;
85+
}
86+
let currentFrame: PuppeteerPage | Frame = page;
87+
88+
// Start with the target element and collect all parents
89+
const parents: DOMElementNode[] = [];
90+
let current = element;
91+
while (current.parent) {
92+
parents.push(current.parent);
93+
current = current.parent;
94+
}
95+
96+
// Process all iframe parents in sequence (in reverse order - top to bottom)
97+
const iframes = parents.reverse().filter((item) => item.tagName === 'iframe');
98+
for (const parent of iframes) {
99+
const cssSelector = parent.enhancedCssSelectorForElement(
100+
options.includeDynamicAttributes,
101+
);
102+
const frameElement: ElementHandle | null =
103+
await currentFrame.$(cssSelector);
104+
if (!frameElement) {
105+
// throw new Error(`Could not find iframe with selector: ${cssSelector}`);
106+
console.warn(`Could not find iframe with selector: ${cssSelector}`);
107+
return null;
108+
}
109+
const frame: Frame | null = await frameElement.contentFrame();
110+
if (!frame) {
111+
// throw new Error(`Could not access frame content for selector: ${cssSelector}`);
112+
console.warn(
113+
`Could not access frame content for selector: ${cssSelector}`,
114+
);
115+
return null;
116+
}
117+
currentFrame = frame;
118+
}
119+
120+
const cssSelector = element.enhancedCssSelectorForElement(
121+
options.includeDynamicAttributes,
122+
);
123+
124+
try {
125+
const elementHandle: ElementHandle | null =
126+
await currentFrame.$(cssSelector);
127+
if (elementHandle) {
128+
// Scroll element into view if needed
129+
await scrollIntoViewIfNeeded(elementHandle);
130+
return elementHandle;
131+
}
132+
} catch (error) {
133+
console.error('Failed to locate element:', error);
134+
}
135+
136+
return null;
137+
}
11138

12139
export async function waitForStableNetwork(
13140
page: PuppeteerPage | null,

0 commit comments

Comments
 (0)