Skip to content

feat(ui-tars): format error status and messages #534

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions apps/ui-tars/src/main/services/runAgent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ export const runAgent = async (
}) => {
const lastConv = getState().messages[getState().messages.length - 1];
const { status, conversations, ...restUserData } = data;
logger.info('[status]', status, conversations.length);
logger.info('[onGUIAgentData] status', status, conversations.length);

// add SoM to conversations
const conversationsWithSoM: ConversationWithSoM[] = await Promise.all(
Expand Down Expand Up @@ -102,7 +102,7 @@ export const runAgent = async (
...rest
} = conversationsWithSoM?.[conversationsWithSoM.length - 1] || {};
logger.info(
'======data======\n',
'[onGUIAgentData] ======data======\n',
predictionParsed,
screenshotContext,
rest,
Expand Down Expand Up @@ -165,8 +165,18 @@ export const runAgent = async (
signal: abortController?.signal,
operator: operator,
onData: handleData,
onError: ({ error }) => {
logger.error('[runAgent error]', settings, error);
onError: (params) => {
const { error } = params;
logger.error('[onGUIAgentError]', settings, error);
setState({
...getState(),
status: StatusEnum.ERROR,
errorMsg: JSON.stringify({
status: error.status,
message: error.message,
stack: error.stack,
}),
});
},
retry: {
model: {
Expand Down
32 changes: 30 additions & 2 deletions apps/ui-tars/src/renderer/src/components/RunMessages/Messages.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/
import { AlertCircle, Camera, Loader2 } from 'lucide-react';
import { Button } from '@renderer/components/ui/button';
import { GUIAgentError, ErrorStatusEnum } from '@ui-tars/shared/types';

export const HumanTextMessage = ({ text }: { text: string }) => {
return (
Expand All @@ -27,13 +28,40 @@ export const ScreenshotMessage = ({ onClick }: ScreenshotMessageProps) => {
};

export const ErrorMessage = ({ text }: { text: string }) => {
let parsedError: GUIAgentError | null = null;
try {
const parsed = JSON.parse(text);
if (parsed && typeof parsed === 'object' && 'status' in parsed) {
parsedError = parsed as GUIAgentError;
}
} catch {
// ignore
}

return (
<div className="flex flex-col gap-2 my-4 p-4 bg-red-500/10 border border-red-500/20 rounded-lg">
<div className="flex items-center gap-2">
<AlertCircle className="w-5 h-5 text-red-500 shrink-0" />
<span className="font-medium text-red-500">Error</span>
<span className="font-medium text-red-500">
{parsedError
? ErrorStatusEnum[parsedError.status] || 'UNKNOWN_ERROR'
: 'Error'}
</span>
</div>
<div className="text-sm text-red-500/90 break-all">{text}</div>
{parsedError ? (
<div className="flex flex-col gap-1">
<div className="text-sm text-red-500/90 font-medium">
{parsedError.message}
</div>
{parsedError.stack && (
<div className="text-xs text-red-500/70 font-mono mt-2">
{parsedError.stack}
</div>
)}
</div>
) : (
<div className="text-sm text-red-500/90 break-all">{text}</div>
)}
</div>
);
};
Expand Down
182 changes: 145 additions & 37 deletions packages/ui-tars/sdk/src/GUIAgent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
StatusEnum,
ShareVersion,
ErrorStatusEnum,
GUIAgentError,
} from '@ui-tars/shared/types';
import { IMAGE_PLACEHOLDER, MAX_LOOP_COUNT } from '@ui-tars/shared/constants';
import { sleep } from '@ui-tars/shared/utils';
Expand All @@ -31,6 +32,7 @@ import {
SYSTEM_PROMPT,
SYSTEM_PROMPT_TEMPLATE,
} from './constants';
import { InternalServerError } from 'openai';

export class GUIAgent<T extends Operator> extends BaseGUIAgent<
GUIAgentConfig<T>
Expand Down Expand Up @@ -145,17 +147,24 @@ export class GUIAgent<T extends Operator> extends BaseGUIAgent<
break;
}

if (loopCnt >= maxLoopCount || snapshotErrCnt >= MAX_SNAPSHOT_ERR_CNT) {
if (loopCnt >= maxLoopCount) {
Object.assign(data, {
status:
loopCnt >= maxLoopCount ? StatusEnum.MAX_LOOP : StatusEnum.ERROR,
...(snapshotErrCnt >= MAX_SNAPSHOT_ERR_CNT && {
error: {
code: ErrorStatusEnum.SCREENSHOT_ERROR,
error: 'Too many screenshot failures',
stack: 'null',
},
}),
status: StatusEnum.ERROR,
error: this.guiAgentErrorParser(
null,
ErrorStatusEnum.REACH_MAXLOOP_ERROR,
),
});
break;
}

if (snapshotErrCnt >= MAX_SNAPSHOT_ERR_CNT) {
Object.assign(data, {
status: StatusEnum.ERROR,
error: this.guiAgentErrorParser(
null,
ErrorStatusEnum.SCREENSHOT_RETRY_ERROR,
),
});
break;
}
Expand Down Expand Up @@ -250,7 +259,19 @@ export class GUIAgent<T extends Operator> extends BaseGUIAgent<
parsedPredictions: [],
};
}
throw error;

Object.assign(data, {
status: StatusEnum.ERROR,
error: this.guiAgentErrorParser(
error,
ErrorStatusEnum.INVOKE_RETRY_ERROR,
),
});

return {
prediction: '',
parsedPredictions: [],
};
}
},
{
Expand All @@ -259,14 +280,14 @@ export class GUIAgent<T extends Operator> extends BaseGUIAgent<
},
);

logger.info('[GUIAgent Response]:', prediction);
logger.info('[GUIAgent] Response:', prediction);
logger.info(
'GUIAgent Parsed Predictions:',
'[GUIAgent] Parsed Predictions:',
JSON.stringify(parsedPredictions),
);

if (!prediction) {
logger.error('[GUIAgent Response Empty]:', prediction);
logger.error('[GUIAgent] Response Empty:', prediction);
continue;
}

Expand Down Expand Up @@ -301,27 +322,32 @@ export class GUIAgent<T extends Operator> extends BaseGUIAgent<
for (const parsedPrediction of parsedPredictions) {
const actionType = parsedPrediction.action_type;

logger.info('GUIAgent Action:', actionType);
logger.info('[GUIAgent] Action:', actionType);

// handle internal action spaces
if (actionType === INTERNAL_ACTION_SPACES_ENUM.ERROR_ENV) {
Object.assign(data, {
status: StatusEnum.ERROR,
error: {
code: ErrorStatusEnum.ENVIRONMENT_ERROR,
error: 'The environment error occurred when parsing the action',
stack: 'null',
},
error: this.guiAgentErrorParser(
null,
ErrorStatusEnum.ENVIRONMENT_ERROR,
),
});
break;
} else if (actionType === INTERNAL_ACTION_SPACES_ENUM.MAX_LOOP) {
data.status = StatusEnum.MAX_LOOP;
Object.assign(data, {
status: StatusEnum.ERROR,
error: this.guiAgentErrorParser(
null,
ErrorStatusEnum.REACH_MAXLOOP_ERROR,
),
});
break;
}

if (!signal?.aborted && !this.isStopped) {
logger.info(
'GUIAgent Action Inputs:',
'[GUIAgent] Action Inputs:',
parsedPrediction.action_inputs,
parsedPrediction.action_type,
);
Expand All @@ -341,7 +367,14 @@ export class GUIAgent<T extends Operator> extends BaseGUIAgent<
onRetry: retry?.execute?.onRetry,
},
).catch((e) => {
logger.error('GUIAgent execute error', e);
logger.error('[GUIAgent] execute error', e);
Object.assign(data, {
status: StatusEnum.ERROR,
error: this.guiAgentErrorParser(
e,
ErrorStatusEnum.EXECUTE_RETRY_ERROR,
),
});
});

if (executeOutput && executeOutput?.status) {
Expand Down Expand Up @@ -370,24 +403,25 @@ export class GUIAgent<T extends Operator> extends BaseGUIAgent<
}
}
} catch (error) {
logger.error('[GUIAgent] Catch error', error);
if (
error instanceof Error &&
(error.name === 'AbortError' || error.message?.includes('aborted'))
) {
logger.info('Request was aborted');
logger.info('[GUIAgent] Catch: request was aborted');
data.status = StatusEnum.USER_STOPPED;
return;
}

logger.error('[GUIAgent] run error', error);
data.status = StatusEnum.ERROR;
data.error = {
code: ErrorStatusEnum.EXECUTE_ERROR,
error: 'GUIAgent Service Error',
stack: `${error}`,
};
throw error;
data.error = this.guiAgentErrorParser(error);

// We only use OnError callback to dispatch error information to caller,
// and we will not throw error to the caller.
// throw error;
} finally {
logger.info('[GUIAgent] Finally: status', data.status);

if (data.status === StatusEnum.USER_STOPPED) {
await operator.execute({
prediction: '',
Expand All @@ -403,18 +437,20 @@ export class GUIAgent<T extends Operator> extends BaseGUIAgent<
factors: [0, 0],
});
}

await onData?.({ data: { ...data, conversations: [] } });

if (data.status === StatusEnum.ERROR) {
onError?.({
data,
error: data.error || {
code: ErrorStatusEnum.UNKNOWN_ERROR,
error: 'Unkown error occurred',
stack: 'null',
},
error:
data.error ||
new GUIAgentError(
ErrorStatusEnum.UNKNOWN_ERROR,
'Unknown error occurred',
),
});
}
logger.info('[GUIAgent] finally: status', data.status);
}
}

Expand Down Expand Up @@ -449,4 +485,76 @@ export class GUIAgent<T extends Operator> extends BaseGUIAgent<
actionSpaces.join('\n'),
);
}

private guiAgentErrorParser(
error: unknown,
type: ErrorStatusEnum | null = null,
): GUIAgentError {
this.logger.error('[GUIAgent] guiAgentErrorParser:', error);

let parseError = null;

if (error instanceof InternalServerError) {
this.logger.error(
'[GUIAgent] guiAgentErrorParser instanceof InternalServerError.',
);
parseError = new GUIAgentError(
ErrorStatusEnum.MODEL_SERVICE_ERROR,
error.message,
error.stack,
);
}

if (!parseError && type === ErrorStatusEnum.REACH_MAXLOOP_ERROR) {
parseError = new GUIAgentError(
ErrorStatusEnum.REACH_MAXLOOP_ERROR,
'Has reached max loop count',
);
}

if (!parseError && type === ErrorStatusEnum.SCREENSHOT_RETRY_ERROR) {
parseError = new GUIAgentError(
ErrorStatusEnum.SCREENSHOT_RETRY_ERROR,
'Too many screenshot failures',
);
}

if (!parseError && type === ErrorStatusEnum.INVOKE_RETRY_ERROR) {
parseError = new GUIAgentError(
ErrorStatusEnum.INVOKE_RETRY_ERROR,
'Too many model invoke failures',
'null',
);
}

if (!parseError && type === ErrorStatusEnum.EXECUTE_RETRY_ERROR) {
parseError = new GUIAgentError(
ErrorStatusEnum.EXECUTE_RETRY_ERROR,
'Too many action execute failures',
'null',
);
}

if (!parseError && type === ErrorStatusEnum.ENVIRONMENT_ERROR) {
parseError = new GUIAgentError(
ErrorStatusEnum.ENVIRONMENT_ERROR,
'The environment error occurred when parsing the action',
);
}

if (!parseError) {
parseError = new GUIAgentError(
ErrorStatusEnum.UNKNOWN_ERROR,
error instanceof Error ? error.message : 'Unknown error occurred',
error instanceof Error ? error.stack || 'null' : 'null',
);
}

if (!parseError.stack) {
// Avoid guiAgentErrorParser it self in stack trace
Error.captureStackTrace(parseError, this.guiAgentErrorParser);
}

return parseError;
}
}
Loading
Loading