diff --git a/packages/ui-tars/action-parser/src/actionParser.ts b/packages/ui-tars/action-parser/src/actionParser.ts index 2d897d0ac..1f90007f8 100644 --- a/packages/ui-tars/action-parser/src/actionParser.ts +++ b/packages/ui-tars/action-parser/src/actionParser.ts @@ -203,19 +203,24 @@ function parseAction(actionStr: string) { const kwargs = {}; if (argsStr.trim()) { - // Split on commas that aren't inside quotes or parentheses - const argPairs = argsStr.match(/([^,']|'[^']*')+/g) || []; + const argPairs = + argsStr.match(/([^,']|'[^']*'|'.*?<\/bbox>')+/g) || []; for (const pair of argPairs) { const [key, ...valueParts] = pair.split('='); if (!key) continue; - // Join value parts back together in case there were = signs in the value - const value = valueParts + let value = valueParts .join('=') .trim() .replace(/^['"]|['"]$/g, ''); // Remove surrounding quotes + // 处理 bbox 格式 + if (value.includes('')) { + value = value.replace(/|<\/bbox>/g, '').replace(/\s+/g, ','); + value = `(${value})`; + } + //@ts-ignore kwargs[key.trim()] = value; } diff --git a/packages/ui-tars/action-parser/test/actionParser.test.ts b/packages/ui-tars/action-parser/test/actionParser.test.ts index 8a0203180..1df355a5c 100644 --- a/packages/ui-tars/action-parser/test/actionParser.test.ts +++ b/packages/ui-tars/action-parser/test/actionParser.test.ts @@ -140,6 +140,93 @@ Action: click(start_box='(100,200)') }); }); + // M8 mode tests + describe('M8 mode', () => { + it('should correctly parse M8 format input', () => { + const input = `Thought: 我看到当前屏幕显示的是一个电子表格软件和一个聊天窗口,而任务要求我需要在浏览器中搜索北京明天天气。我需要先点击任务栏上的浏览器图标来启动浏览器。 +Action: click(start_box='637 964 637 964')`; + + const result = parseActionVlm(input, [1000, 1000], 'bc'); + + expect(result).toEqual([ + { + reflection: null, + thought: + '我看到当前屏幕显示的是一个电子表格软件和一个聊天窗口,而任务要求我需要在浏览器中搜索北京明天天气。我需要先点击任务栏上的浏览器图标来启动浏览器。', + action_type: 'click', + action_inputs: { + start_box: '[0.637,0.964,0.637,0.964]', + }, + }, + ]); + }); + + it('should correctly parse M8 format input', () => { + const input = `Thought: 我看到当前屏幕显示的是一个电子表格软件和一个聊天窗口,而任务要求我需要在浏览器中搜索北京明天天气。我需要先点击任务栏上的浏览器图标来启动浏览器。 +Action: click(start_box='637 964 637 964')`; + + const result = parseActionVlm(input, [1000, 1000], 'bc', { + width: 2560, + height: 1440, + }); + + expect(result).toEqual([ + { + reflection: null, + thought: + '我看到当前屏幕显示的是一个电子表格软件和一个聊天窗口,而任务要求我需要在浏览器中搜索北京明天天气。我需要先点击任务栏上的浏览器图标来启动浏览器。', + action_type: 'click', + action_inputs: { + start_box: '[0.637,0.964,0.637,0.964]', + start_coords: [1630.72, 1388.16], + }, + }, + ]); + }); + + it('should correctly parse M8 format input', () => { + const input = `Thought: 我看到当前屏幕显示的是一个电子表格软件和一个聊天窗口,而任务要求我需要在浏览器中搜索北京明天天气。我需要先点击任务栏上的浏览器图标来启动浏览器。 +Action: click(start_box='[637,964,637,964]')`; + + const result = parseActionVlm(input, [1000, 1000], 'bc'); + + expect(result).toEqual([ + { + reflection: null, + thought: + '我看到当前屏幕显示的是一个电子表格软件和一个聊天窗口,而任务要求我需要在浏览器中搜索北京明天天气。我需要先点击任务栏上的浏览器图标来启动浏览器。', + action_type: 'click', + action_inputs: { + start_box: '[0.637,0.964,0.637,0.964]', + }, + }, + ]); + }); + + it('should correctly parse M8 format input', () => { + const input = `Thought: 我看到当前屏幕显示的是一个电子表格软件和一个聊天窗口,而任务要求我需要在浏览器中搜索北京明天天气。我需要先点击任务栏上的浏览器图标来启动浏览器。 +Action: click(start_box='[637,964,637,964]')`; + + const result = parseActionVlm(input, [1000, 1000], 'bc', { + width: 2560, + height: 1440, + }); + + expect(result).toEqual([ + { + reflection: null, + thought: + '我看到当前屏幕显示的是一个电子表格软件和一个聊天窗口,而任务要求我需要在浏览器中搜索北京明天天气。我需要先点击任务栏上的浏览器图标来启动浏览器。', + action_type: 'click', + action_inputs: { + start_box: '[0.637,0.964,0.637,0.964]', + start_coords: [1630.72, 1388.16], + }, + }, + ]); + }); + }); + describe('Box coordinates normalization', () => { it('should correctly normalize box with four coordinates', () => { const input = `Thought: I need to click on this element diff --git a/packages/ui-tars/action-parser/vitest.config.mts b/packages/ui-tars/action-parser/vitest.config.mts index 1b73d9704..a3d4e1b12 100644 --- a/packages/ui-tars/action-parser/vitest.config.mts +++ b/packages/ui-tars/action-parser/vitest.config.mts @@ -14,7 +14,7 @@ export default defineProject({ root: './', test: { globals: true, - setupFiles: [resolve(__dirname, '../../scripts/vitest-setup.ts')], + setupFiles: [resolve(__dirname, '../../../scripts/vitest-setup.ts')], environment: 'node', includeSource: [resolve(__dirname, '.')], },