Skip to content

Commit 5bb3072

Browse files
ulivzycjcl868
andauthored
fix(execute): type workaround for unexpected newline in content value (#6)
* feat(action-parser): workaround for unexpected newline in content value * fix(execute): replace tail newline when typing * feat(execute): page up and page down --------- Co-authored-by: jinxin001 <jinxin001@bytedance.com>
1 parent 435ddbe commit 5bb3072

File tree

5 files changed

+166
-9
lines changed

5 files changed

+166
-9
lines changed

packages/action-parser/src/index.test.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
/**
2+
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
// @prettier
16
import { describe, expect, it } from 'vitest';
27

38
import { actionParser } from './index';
@@ -90,4 +95,20 @@ describe('actionParser', () => {
9095
],
9196
});
9297
});
98+
99+
it('should return parsed action with newline', () => {
100+
const result = actionParser({
101+
// prettier-ignore
102+
prediction: "Thought: 我已经点击了地址栏,现在需要输入网址doubao.com。地址栏已经被激活,可以直接输入网址。\nAction: type(content='doubao.com\n')",
103+
factor: 1000,
104+
});
105+
106+
expect(result.parsed[0].thought).toBe(
107+
'我已经点击了地址栏,现在需要输入网址doubao.com。地址栏已经被激活,可以直接输入网址。',
108+
);
109+
expect(result.parsed[0].action_type).toBe('type');
110+
expect(result.parsed[0].action_inputs.content).toEqual(
111+
String.raw`doubao.com\n`,
112+
);
113+
});
93114
});

packages/action-parser/src/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ function parseActionVlm(
7979
const actions: PredictionParsed[] = [];
8080

8181
for (const rawStr of allActions) {
82-
const actionInstance = parseAction(rawStr.replace(/\n/g, '\\n').trim());
82+
// prettier-ignore
83+
const actionInstance = parseAction(rawStr.replace(/\n/g, String.raw`\n`).trimStart());
8384
if (!actionInstance) {
8485
console.log(`Action can't parse: ${rawStr}`);
8586
continue;

src/main/agent/execute.test.ts

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import { Key, keyboard } from '@computer-use/nut-js';
2+
import { beforeEach, describe, expect, it, vi } from 'vitest';
3+
4+
import { ExecuteParams, execute } from './execute';
5+
6+
// Mock @computer-use/nut-js
7+
vi.mock('@computer-use/nut-js', async (importOriginal) => {
8+
const actual: any = await importOriginal();
9+
return {
10+
mouse: {
11+
move: vi.fn(),
12+
click: vi.fn(),
13+
config: {
14+
mouseSpeed: 1500,
15+
},
16+
},
17+
Key: actual.Key,
18+
keyboard: {
19+
type: vi.fn(),
20+
pressKey: vi.fn(),
21+
releaseKey: vi.fn(),
22+
config: {
23+
autoDelayMs: 0,
24+
},
25+
},
26+
Button: {
27+
LEFT: 'left',
28+
RIGHT: 'right',
29+
MIDDLE: 'middle',
30+
},
31+
Point: vi.fn(),
32+
straightTo: vi.fn((point) => point),
33+
sleep: vi.fn(),
34+
};
35+
});
36+
37+
describe('execute', () => {
38+
const mockLogger = {
39+
info: vi.fn(),
40+
warn: vi.fn(),
41+
error: vi.fn(),
42+
};
43+
44+
beforeEach(() => {
45+
vi.clearAllMocks();
46+
});
47+
48+
it('type doubao.com\n', async () => {
49+
const executeParams: ExecuteParams = {
50+
prediction: {
51+
reflection: '',
52+
thought:
53+
'To proceed with the task of accessing doubao.com, I need to type the URL into the address bar. This will allow me to navigate to the website and continue with the subsequent steps of the task.\n' +
54+
`Type "doubao.com" into the browser's address bar.`,
55+
action_type: 'type',
56+
action_inputs: { content: 'doubao.com\\n' },
57+
},
58+
screenWidth: 1920,
59+
screenHeight: 1080,
60+
logger: mockLogger,
61+
scaleFactor: 1,
62+
};
63+
64+
await execute(executeParams);
65+
66+
expect(keyboard.type).toHaveBeenCalledWith('doubao.com');
67+
expect(keyboard.pressKey).toHaveBeenCalledWith(Key.Enter);
68+
});
69+
70+
it('type doubao.com', async () => {
71+
const executeParams: ExecuteParams = {
72+
prediction: {
73+
reflection: '',
74+
thought:
75+
'To proceed with the task of accessing doubao.com, I need to type the URL into the address bar. This will allow me to navigate to the website and continue with the subsequent steps of the task.\n' +
76+
`Type "doubao.com" into the browser's address bar.`,
77+
action_type: 'type',
78+
action_inputs: { content: 'doubao.com' },
79+
},
80+
screenWidth: 1920,
81+
screenHeight: 1080,
82+
logger: mockLogger,
83+
scaleFactor: 1,
84+
};
85+
86+
await execute(executeParams);
87+
88+
expect(keyboard.type).toHaveBeenCalledWith('doubao.com');
89+
expect(keyboard.pressKey).not.toHaveBeenCalledWith(Key.Enter);
90+
});
91+
92+
it('type Hello World\nUI-TARS\n', async () => {
93+
const executeParams: ExecuteParams = {
94+
prediction: {
95+
reflection: '',
96+
thought:
97+
'To proceed with the task of accessing doubao.com, I need to type the URL into the address bar. This will allow me to navigate to the website and continue with the subsequent steps of the task.\n' +
98+
`Type "Hello World\nUI-TARS\n" into the browser's address bar.`,
99+
action_type: 'type',
100+
action_inputs: { content: 'Hello World\\nUI-TARS\\n' },
101+
},
102+
screenWidth: 1920,
103+
screenHeight: 1080,
104+
logger: mockLogger,
105+
scaleFactor: 1,
106+
};
107+
108+
await execute(executeParams);
109+
110+
expect(keyboard.type).toHaveBeenCalledWith('Hello World\\nUI-TARS');
111+
expect(keyboard.pressKey).toHaveBeenCalledWith(Key.Enter);
112+
});
113+
});

src/main/agent/execute.ts

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,15 @@ const moveStraightTo = async (startX: number | null, startY: number | null) => {
2525
await mouse.move(straightTo(new Point(startX, startY)));
2626
};
2727

28-
export const execute = async (executeParams: {
28+
export interface ExecuteParams {
2929
scaleFactor?: number;
3030
prediction: PredictionParsed;
3131
screenWidth: number;
3232
screenHeight: number;
3333
logger?: any;
34-
}) => {
34+
}
35+
36+
export const execute = async (executeParams: ExecuteParams) => {
3537
const {
3638
prediction,
3739
screenWidth,
@@ -141,17 +143,24 @@ export const execute = async (executeParams: {
141143
const content = action_inputs.content?.trim();
142144
logger.info('[device] type', content);
143145
if (content) {
146+
const stripContent = content.replace(/\\n$/, '').replace(/\n$/, '');
144147
keyboard.config.autoDelayMs = 0;
145148
if (env.isWindows) {
146149
const originalClipboard = clipboard.readText();
147-
clipboard.writeText(content);
150+
clipboard.writeText(stripContent);
148151
await keyboard.pressKey(Key.LeftControl, Key.V);
149152
await keyboard.releaseKey(Key.LeftControl, Key.V);
150-
await sleep(100);
153+
await sleep(500);
151154
clipboard.writeText(originalClipboard);
152155
} else {
153-
await keyboard.type(content);
156+
await keyboard.type(stripContent);
157+
}
158+
159+
if (content.endsWith('\n') || content.endsWith('\\n')) {
160+
await keyboard.pressKey(Key.Enter);
161+
await keyboard.releaseKey(Key.Enter);
154162
}
163+
155164
keyboard.config.autoDelayMs = 500;
156165
}
157166
break;
@@ -167,27 +176,35 @@ export const execute = async (executeParams: {
167176
shift: Key.LeftShift,
168177
alt: Key.LeftAlt,
169178
space: Key.Space,
179+
'page down': Key.PageDown,
180+
pagedown: Key.PageDown,
181+
'page up': Key.PageUp,
182+
pageup: Key.PageUp,
170183
};
171184

172185
const keys = keyStr
173186
.split(/[\s+]/)
174187
.map((k) => keyMap[k.toLowerCase()] || Key[k as keyof typeof Key]);
175188
logger.info('[hotkey]: ', keys);
176189
await keyboard.pressKey(...keys);
190+
await keyboard.releaseKey(...keys);
177191
}
178192
break;
179193
}
180194

181195
case 'scroll': {
182196
const { direction } = action_inputs;
183-
await moveStraightTo(startX, startY);
197+
// if startX and startY is not null, move mouse to startX, startY
198+
if (startX !== null && startY !== null) {
199+
await moveStraightTo(startX, startY);
200+
}
184201

185202
switch (direction?.toLowerCase()) {
186203
case 'up':
187-
await mouse.scrollUp(5); // 向上滚动为正数
204+
await mouse.scrollUp(5);
188205
break;
189206
case 'down':
190-
await mouse.scrollDown(-5); // 向下滚动为负数
207+
await mouse.scrollDown(5);
191208
break;
192209
default:
193210
console.warn(`Unsupported scroll direction: ${direction}`);

src/renderer/src/components/RunMessages/index.tsx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ const RunMessages: React.FC<RunMessagesProps> = (props) => {
7272
borderColor="rgba(112, 107, 87, 0.5)"
7373
p={4}
7474
overflow="auto"
75+
css={{
76+
'&::-webkit-scrollbar': 'initial',
77+
'&::-webkit-scrollbar-track': 'initial',
78+
'&::-webkit-scrollbar-thumb': 'border-radius: 4px',
79+
}}
7580
>
7681
{Boolean(loading) && (
7782
<Center h="100%">

0 commit comments

Comments
 (0)