Skip to content

Commit 0e560aa

Browse files
authored
fix(execute): drag not work (#26)
1 parent da75e07 commit 0e560aa

File tree

4 files changed

+145
-4
lines changed

4 files changed

+145
-4
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
"@ui-tars/action-parser": "workspace:*",
4747
"@ui-tars/shared": "workspace:*",
4848
"async-retry": "^1.3.3",
49+
"big.js": "^6.2.2",
4950
"dotenv": "^16.4.7",
5051
"electron-debug": "^3.2.0",
5152
"electron-devtools-installer": "^3.2.0",

pnpm-lock.yaml

Lines changed: 17 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/main/agent/execute.test.ts

Lines changed: 117 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
1-
import { Key, keyboard } from '@computer-use/nut-js';
1+
import {
2+
Button,
3+
Key,
4+
Point,
5+
Region,
6+
centerOf,
7+
keyboard,
8+
mouse,
9+
straightTo,
10+
} from '@computer-use/nut-js';
211
import { beforeEach, describe, expect, it, vi } from 'vitest';
312

413
import { ExecuteParams, execute } from './execute';
@@ -13,6 +22,7 @@ vi.mock('@computer-use/nut-js', async (importOriginal) => {
1322
config: {
1423
mouseSpeed: 1500,
1524
},
25+
drag: vi.fn(),
1626
},
1727
Key: actual.Key,
1828
keyboard: {
@@ -28,8 +38,11 @@ vi.mock('@computer-use/nut-js', async (importOriginal) => {
2838
RIGHT: 'right',
2939
MIDDLE: 'middle',
3040
},
31-
Point: vi.fn(),
41+
Point: actual.Point,
42+
Region: actual.Region,
3243
straightTo: vi.fn((point) => point),
44+
centerOf: vi.fn((region) => region),
45+
randomPointIn: vi.fn((region) => region),
3346
sleep: vi.fn(),
3447
};
3548
});
@@ -45,6 +58,56 @@ describe('execute', () => {
4558
vi.clearAllMocks();
4659
});
4760

61+
it('Click on the search bar at the top of the screen', async () => {
62+
const executeParams: ExecuteParams = {
63+
prediction: {
64+
reflection: '',
65+
thought: 'Click on the search bar at the top of the screen\n',
66+
action_type: 'click',
67+
action_inputs: {
68+
start_box: '[0.072,0.646,0.072,0.646]',
69+
},
70+
},
71+
screenWidth: 1920,
72+
screenHeight: 1080,
73+
logger: mockLogger,
74+
scaleFactor: 1,
75+
};
76+
77+
await execute(executeParams);
78+
79+
expect(mouse.move).toHaveBeenCalledWith(
80+
straightTo(new Point(138.24, 697.68)),
81+
);
82+
83+
expect(mouse.click).toHaveBeenCalledWith(Button.LEFT);
84+
});
85+
86+
it('Click on the search bar at the top of the screen with scaleFactor', async () => {
87+
const executeParams: ExecuteParams = {
88+
prediction: {
89+
reflection: '',
90+
thought: 'Click on the search bar at the top of the screen\n',
91+
action_type: 'click',
92+
action_inputs: {
93+
start_box: '[0.072,0.646,0.072,0.646]',
94+
},
95+
},
96+
screenWidth: 1920,
97+
screenHeight: 1080,
98+
logger: mockLogger,
99+
scaleFactor: 1.5,
100+
};
101+
102+
await execute(executeParams);
103+
104+
expect(mouse.move).toHaveBeenCalledWith(
105+
straightTo(new Point(207.36, 1046.52)),
106+
);
107+
108+
expect(mouse.click).toHaveBeenCalledWith(Button.LEFT);
109+
});
110+
48111
it('type doubao.com\n', async () => {
49112
const executeParams: ExecuteParams = {
50113
prediction: {
@@ -110,4 +173,56 @@ describe('execute', () => {
110173
expect(keyboard.type).toHaveBeenCalledWith('Hello World\\nUI-TARS');
111174
expect(keyboard.pressKey).toHaveBeenCalledWith(Key.Enter);
112175
});
176+
177+
it('drag slider horizontally', async () => {
178+
const executeParams: ExecuteParams = {
179+
prediction: {
180+
reflection: '',
181+
thought:
182+
'To narrow down the search results to cat litters within the specified price range of $18 to $32, I need to adjust the price filter. The next logical step is to drag the left handle of the price slider to set the minimum price to $18, ensuring that only products within the desired range are displayed.\n' +
183+
'Drag the left handle of the price slider to set the minimum price to $18.',
184+
action_type: 'drag',
185+
action_inputs: {
186+
start_box: '[0.072,0.646,0.072,0.646]',
187+
end_box: '[0.175,0.647,0.175,0.647]',
188+
},
189+
},
190+
screenWidth: 1920,
191+
screenHeight: 1080,
192+
logger: mockLogger,
193+
scaleFactor: 1,
194+
};
195+
196+
await execute(executeParams);
197+
198+
expect(mouse.drag).toHaveBeenCalledWith(
199+
straightTo(centerOf(new Region(138.24, 697.68, 197.76, 1.08))),
200+
);
201+
});
202+
203+
it('drag slider vertically', async () => {
204+
const executeParams: ExecuteParams = {
205+
prediction: {
206+
reflection: '',
207+
thought:
208+
'To narrow down the search results to cat litters within the specified price range of $18 to $32, I need to adjust the price filter. The next logical step is to drag the left handle of the price slider to set the minimum price to $18, ensuring that only products within the desired range are displayed.\n' +
209+
'Drag the left handle of the price slider to set the minimum price to $18.',
210+
action_type: 'drag',
211+
action_inputs: {
212+
start_box: '[0.072,0.646,0.072,0.646]',
213+
end_box: '[0.072,0.546,0.072,0.546]',
214+
},
215+
},
216+
screenWidth: 1920,
217+
screenHeight: 1080,
218+
logger: mockLogger,
219+
scaleFactor: 1,
220+
};
221+
222+
await execute(executeParams);
223+
224+
expect(mouse.drag).toHaveBeenCalledWith(
225+
straightTo(centerOf(new Region(138.24, 697.68, 0, -108))),
226+
);
227+
});
113228
});

src/main/agent/execute.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@ import {
66
Button,
77
Key,
88
Point,
9+
Region,
10+
centerOf,
911
keyboard,
1012
mouse,
1113
sleep,
1214
straightTo,
1315
} from '@computer-use/nut-js';
16+
import Big from 'big.js';
1417
import { clipboard } from 'electron';
1518

1619
import { PredictionParsed } from '@ui-tars/shared/types';
@@ -133,7 +136,13 @@ export const execute = async (executeParams: ExecuteParams) => {
133136
);
134137

135138
if (startX && startY && endX && endY) {
136-
await mouse.drag([new Point(startX, startY), new Point(endX, endY)]);
139+
// calculate x and y direction difference
140+
const diffX = Big(endX).minus(startX).toNumber();
141+
const diffY = Big(endY).minus(startY).toNumber();
142+
143+
await mouse.drag(
144+
straightTo(centerOf(new Region(startX, startY, diffX, diffY))),
145+
);
137146
}
138147
}
139148
break;

0 commit comments

Comments
 (0)