Skip to content

Commit 79f29c0

Browse files
committed
fix(execute): replace tail newline when typing
1 parent 1746a07 commit 79f29c0

File tree

4 files changed

+145
-28
lines changed

4 files changed

+145
-28
lines changed

packages/action-parser/src/index.test.ts

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
/**
2+
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
// @prettier
16
import { describe, expect, it } from 'vitest';
27

38
import { actionParser } from './index';
@@ -91,25 +96,19 @@ describe('actionParser', () => {
9196
});
9297
});
9398

94-
it('should remove trailing \\n in content value', () => {
99+
it('should return parsed action with newline', () => {
95100
const result = actionParser({
96-
prediction:
97-
'Thought: To proceed with the task of accessing "doubao.com," I need to type the correct URL into the address bar. Since the address bar is already active, the next logical step is to input the URL "doubao.com" to navigate to the desired website.\nType "doubao.com" into the address bar to initiate navigation to the website.\nAction: type(content=\'doubao.com\\n\')',
101+
// prettier-ignore
102+
prediction: "Thought: 我已经点击了地址栏,现在需要输入网址doubao.com。地址栏已经被激活,可以直接输入网址。\nAction: type(content='doubao.com\n')",
98103
factor: 1000,
99104
});
100105

101-
expect(result).toEqual({
102-
parsed: [
103-
{
104-
action_inputs: {
105-
content: 'doubao.com',
106-
},
107-
action_type: 'type',
108-
reflection: '',
109-
thought:
110-
'To proceed with the task of accessing "doubao.com," I need to type the correct URL into the address bar. Since the address bar is already active, the next logical step is to input the URL "doubao.com" to navigate to the desired website.\nType "doubao.com" into the address bar to initiate navigation to the website.',
111-
},
112-
],
113-
});
106+
expect(result.parsed[0].thought).toBe(
107+
'我已经点击了地址栏,现在需要输入网址doubao.com。地址栏已经被激活,可以直接输入网址。',
108+
);
109+
expect(result.parsed[0].action_type).toBe('type');
110+
expect(result.parsed[0].action_inputs.content).toEqual(
111+
String.raw`doubao.com\n`,
112+
);
114113
});
115114
});

packages/action-parser/src/index.ts

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ function parseActionVlm(
7979
const actions: PredictionParsed[] = [];
8080

8181
for (const rawStr of allActions) {
82-
const actionInstance = parseAction(rawStr.replace(/\n/g, '\\n').trim());
82+
// prettier-ignore
83+
const actionInstance = parseAction(rawStr.replace(/\n/g, String.raw`\n`).trimStart());
8384
if (!actionInstance) {
8485
console.log(`Action can't parse: ${rawStr}`);
8586
continue;
@@ -152,16 +153,11 @@ function parseAction(actionStr: string) {
152153
if (!key) continue;
153154

154155
// Join value parts back together in case there were = signs in the value
155-
let value = valueParts
156+
const value = valueParts
156157
.join('=')
157158
.trim()
158159
.replace(/^['"]|['"]$/g, ''); // Remove surrounding quotes
159160

160-
// Remove trailing \n in content value
161-
if (key.trim() === 'content' && value.endsWith('\\n')) {
162-
value = value.slice(0, -2);
163-
}
164-
165161
//@ts-ignore
166162
kwargs[key.trim()] = value;
167163
}

src/main/agent/execute.test.ts

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import { Key, keyboard } from '@computer-use/nut-js';
2+
import { beforeEach, describe, expect, it, vi } from 'vitest';
3+
4+
import { ExecuteParams, execute } from './execute';
5+
6+
// Mock @computer-use/nut-js
7+
vi.mock('@computer-use/nut-js', async (importOriginal) => {
8+
const actual: any = await importOriginal();
9+
return {
10+
mouse: {
11+
move: vi.fn(),
12+
click: vi.fn(),
13+
config: {
14+
mouseSpeed: 1500,
15+
},
16+
},
17+
Key: actual.Key,
18+
keyboard: {
19+
type: vi.fn(),
20+
pressKey: vi.fn(),
21+
releaseKey: vi.fn(),
22+
config: {
23+
autoDelayMs: 0,
24+
},
25+
},
26+
Button: {
27+
LEFT: 'left',
28+
RIGHT: 'right',
29+
MIDDLE: 'middle',
30+
},
31+
Point: vi.fn(),
32+
straightTo: vi.fn((point) => point),
33+
sleep: vi.fn(),
34+
};
35+
});
36+
37+
describe('execute', () => {
38+
const mockLogger = {
39+
info: vi.fn(),
40+
warn: vi.fn(),
41+
error: vi.fn(),
42+
};
43+
44+
beforeEach(() => {
45+
vi.clearAllMocks();
46+
});
47+
48+
it('type doubao.com\n', async () => {
49+
const executeParams: ExecuteParams = {
50+
prediction: {
51+
reflection: '',
52+
thought:
53+
'To proceed with the task of accessing doubao.com, I need to type the URL into the address bar. This will allow me to navigate to the website and continue with the subsequent steps of the task.\n' +
54+
`Type "doubao.com" into the browser's address bar.`,
55+
action_type: 'type',
56+
action_inputs: { content: 'doubao.com\\n' },
57+
},
58+
screenWidth: 1920,
59+
screenHeight: 1080,
60+
logger: mockLogger,
61+
scaleFactor: 1,
62+
};
63+
64+
await execute(executeParams);
65+
66+
expect(keyboard.type).toHaveBeenCalledWith('doubao.com');
67+
expect(keyboard.pressKey).toHaveBeenCalledWith(Key.Enter);
68+
});
69+
70+
it('type doubao.com', async () => {
71+
const executeParams: ExecuteParams = {
72+
prediction: {
73+
reflection: '',
74+
thought:
75+
'To proceed with the task of accessing doubao.com, I need to type the URL into the address bar. This will allow me to navigate to the website and continue with the subsequent steps of the task.\n' +
76+
`Type "doubao.com" into the browser's address bar.`,
77+
action_type: 'type',
78+
action_inputs: { content: 'doubao.com' },
79+
},
80+
screenWidth: 1920,
81+
screenHeight: 1080,
82+
logger: mockLogger,
83+
scaleFactor: 1,
84+
};
85+
86+
await execute(executeParams);
87+
88+
expect(keyboard.type).toHaveBeenCalledWith('doubao.com');
89+
expect(keyboard.pressKey).not.toHaveBeenCalledWith(Key.Enter);
90+
});
91+
92+
it('type Hello World\nUI-TARS\n', async () => {
93+
const executeParams: ExecuteParams = {
94+
prediction: {
95+
reflection: '',
96+
thought:
97+
'To proceed with the task of accessing doubao.com, I need to type the URL into the address bar. This will allow me to navigate to the website and continue with the subsequent steps of the task.\n' +
98+
`Type "Hello World\nUI-TARS\n" into the browser's address bar.`,
99+
action_type: 'type',
100+
action_inputs: { content: 'Hello World\\nUI-TARS\\n' },
101+
},
102+
screenWidth: 1920,
103+
screenHeight: 1080,
104+
logger: mockLogger,
105+
scaleFactor: 1,
106+
};
107+
108+
await execute(executeParams);
109+
110+
expect(keyboard.type).toHaveBeenCalledWith('Hello World\\nUI-TARS');
111+
expect(keyboard.pressKey).toHaveBeenCalledWith(Key.Enter);
112+
});
113+
});

src/main/agent/execute.ts

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,15 @@ const moveStraightTo = async (startX: number | null, startY: number | null) => {
2525
await mouse.move(straightTo(new Point(startX, startY)));
2626
};
2727

28-
export const execute = async (executeParams: {
28+
export interface ExecuteParams {
2929
scaleFactor?: number;
3030
prediction: PredictionParsed;
3131
screenWidth: number;
3232
screenHeight: number;
3333
logger?: any;
34-
}) => {
34+
}
35+
36+
export const execute = async (executeParams: ExecuteParams) => {
3537
const {
3638
prediction,
3739
screenWidth,
@@ -141,17 +143,24 @@ export const execute = async (executeParams: {
141143
const content = action_inputs.content?.trim();
142144
logger.info('[device] type', content);
143145
if (content) {
146+
const stripContent = content.replace(/\\n$/, '').replace(/\n$/, '');
144147
keyboard.config.autoDelayMs = 0;
145148
if (env.isWindows) {
146149
const originalClipboard = clipboard.readText();
147-
clipboard.writeText(content);
150+
clipboard.writeText(stripContent);
148151
await keyboard.pressKey(Key.LeftControl, Key.V);
149152
await keyboard.releaseKey(Key.LeftControl, Key.V);
150-
await sleep(100);
153+
await sleep(500);
151154
clipboard.writeText(originalClipboard);
152155
} else {
153-
await keyboard.type(content);
156+
await keyboard.type(stripContent);
157+
}
158+
159+
if (content.endsWith('\n') || content.endsWith('\\n')) {
160+
await keyboard.pressKey(Key.Enter);
161+
await keyboard.releaseKey(Key.Enter);
154162
}
163+
155164
keyboard.config.autoDelayMs = 500;
156165
}
157166
break;

0 commit comments

Comments
 (0)