Skip to content

Commit 68264fe

Browse files
authored
fix(agent): abort not break immediately (#49)
* chore: call_user action * fix(agent): abort not break immediately * fix(ui): disable logo dragged
1 parent 71ab50c commit 68264fe

File tree

7 files changed

+51
-22
lines changed

7 files changed

+51
-22
lines changed

packages/shared/package.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
"typesVersions": {
1818
"*": {
1919
"types": [
20-
"./dist/types/index.d.ts"
20+
"./src/types/index.ts"
2121
],
2222
"*": [
23-
"./dist/*"
23+
"./src/*"
2424
]
2525
}
2626
},
@@ -38,7 +38,8 @@
3838
"registry": "https://registry.npmjs.org/"
3939
},
4040
"files": [
41-
"dist"
41+
"dist",
42+
"src"
4243
],
4344
"dependencies": {},
4445
"devDependencies": {

src/main/agent/execute.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ export const execute = async (executeParams: ExecuteParams) => {
245245
break;
246246
}
247247

248-
case 'screenshot':
248+
case 'call_user':
249249
case 'finished':
250250
break;
251251

src/main/agent/index.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,9 @@ export class ComputerUseAgent {
226226
logger.info('[vlmParams_conversations]:', vlmParams.conversations);
227227
logger.info('[vlmParams_images_len]:', vlmParams.images.length);
228228

229-
const vlmRes = await vlm.invoke(vlmParams);
229+
const vlmRes = await vlm.invoke(vlmParams, {
230+
abortController,
231+
});
230232

231233
if (!vlmRes?.prediction) {
232234
continue;

src/main/agent/llm/base.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ export interface VlmRequest {
99
images: string[];
1010
}
1111

12+
export interface VlmRequestOptions {
13+
abortController?: AbortController | null;
14+
}
15+
1216
export interface VlmResponse {
1317
prediction: string;
1418
reflections?: string[];
@@ -32,5 +36,8 @@ export abstract class VLM<
3236
K extends VlmResponse = VlmResponse,
3337
> {
3438
abstract get vlmModel(): string;
35-
abstract invoke({ conversations, images }: T): Promise<K>;
39+
abstract invoke(
40+
{ conversations, images }: T,
41+
options?: VlmRequestOptions,
42+
): Promise<K>;
3643
}

src/main/agent/llm/ui-tars.ts

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import { store } from '@main/store/create';
1010
import { preprocessResizeImage } from '@main/utils/image';
1111

1212
import { MAX_PIXELS } from '../constant';
13-
import { VLM, VlmRequest, VlmResponse } from './base';
13+
import { VLM, VlmRequest, VlmRequestOptions, VlmResponse } from './base';
1414

1515
export interface UITARSOptions {
1616
reflection: boolean;
@@ -31,7 +31,11 @@ export class UITARS implements VLM<VlmRequest, VlmResponse> {
3131

3232
// [image, prompt]
3333
// [gpt, image]
34-
async invoke({ conversations, images }: VlmRequest) {
34+
async invoke(
35+
{ conversations, images }: VlmRequest,
36+
options?: VlmRequestOptions,
37+
) {
38+
const { abortController } = options ?? {};
3539
const compressedImages = await Promise.all(
3640
images.map((image) => preprocessResizeImage(image, MAX_PIXELS)),
3741
);
@@ -51,19 +55,24 @@ export class UITARS implements VLM<VlmRequest, VlmResponse> {
5155

5256
const startTime = Date.now();
5357
const result = await openai.chat.completions
54-
.create({
55-
model: this.vlmModel,
56-
max_tokens: 1000,
57-
stream: false,
58-
temperature: 0,
59-
top_p: 0.7,
60-
seed: null,
61-
stop: null,
62-
frequency_penalty: null,
63-
presence_penalty: null,
64-
// messages
65-
messages,
66-
})
58+
.create(
59+
{
60+
model: this.vlmModel,
61+
max_tokens: 1000,
62+
stream: false,
63+
temperature: 0,
64+
top_p: 0.7,
65+
seed: null,
66+
stop: null,
67+
frequency_penalty: null,
68+
presence_penalty: null,
69+
// messages
70+
messages,
71+
},
72+
{
73+
signal: abortController?.signal,
74+
},
75+
)
6776
.finally(() => {
6877
logger.info(`[vlm_invoke_time_cost]: ${Date.now() - startTime}ms`);
6978
});

src/main/store/create.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ export const store = createStore<AppState>(
8181
},
8282

8383
RUN_AGENT: async () => {
84+
if (get().thinking) {
85+
return;
86+
}
87+
8488
set({ abortController: new AbortController(), thinking: true });
8589

8690
await runAgent(set, get);
@@ -91,6 +95,7 @@ export const store = createStore<AppState>(
9195
set({ status: StatusEnum.END, thinking: false });
9296
showWindow();
9397
get().abortController?.abort();
98+
9499
closeScreenMarker();
95100
},
96101
SET_INSTRUCTIONS: (instructions) => {

src/renderer/src/components/Header/index.tsx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,12 @@ export default function Header({ className }: { className?: string }) {
1515
<Box position="relative" textAlign="center" className={className}>
1616
<Flex alignItems="center" justifyContent="center">
1717
<HStack>
18-
<Image alt="UI-TARS Logo" src={logoVector} h="40px" />
18+
<Image
19+
alt="UI-TARS Logo"
20+
src={logoVector}
21+
h="40px"
22+
draggable={false}
23+
/>
1924
</HStack>
2025
<Box position="absolute" right="4">
2126
<IconButton

0 commit comments

Comments
 (0)