Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 193 additions & 0 deletions src/api/providers/__tests__/glm-empty-stream.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
import { describe, it, expect, vi, beforeEach } from "vitest"
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[P3] Unused import 'beforeEach' may trip stricter lint settings; suggest removing it.

Suggested change
import { describe, it, expect, vi, beforeEach } from "vitest"
import { describe, it, expect, vi } from "vitest"

import { OpenAiHandler } from "../openai"
import { BaseOpenAiCompatibleProvider } from "../base-openai-compatible-provider"

describe("GLM Empty Stream Handling", () => {
describe("OpenAiHandler", () => {
it("should provide fallback response for GLM models with empty streams", async () => {
const mockClient = {
chat: {
completions: {
create: vi.fn().mockImplementation(async function* () {
// Simulate empty stream - only usage, no content
yield {
choices: [{ delta: {} }],
usage: {
prompt_tokens: 100,
completion_tokens: 0,
},
}
}),
},
},
}

const handler = new OpenAiHandler({
openAiApiKey: "test-key",
openAiModelId: "glm-4.6",
openAiStreamingEnabled: true,
})

// Replace the client with our mock
;(handler as any).client = mockClient

const chunks = []
const stream = handler.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }])

for await (const chunk of stream) {
chunks.push(chunk)
}

// Should have a fallback text response
const textChunks = chunks.filter((c) => c.type === "text")
expect(textChunks).toHaveLength(1)
expect(textChunks[0].text).toContain("trouble generating a response")

// Should still have usage metrics
const usageChunks = chunks.filter((c) => c.type === "usage")
expect(usageChunks).toHaveLength(1)
})

it("should not provide fallback for non-GLM models with empty streams", async () => {
const mockClient = {
chat: {
completions: {
create: vi.fn().mockImplementation(async function* () {
// Simulate empty stream - only usage, no content
yield {
choices: [{ delta: {} }],
usage: {
prompt_tokens: 100,
completion_tokens: 0,
},
}
}),
},
},
}

const handler = new OpenAiHandler({
openAiApiKey: "test-key",
openAiModelId: "gpt-4",
openAiStreamingEnabled: true,
})

// Replace the client with our mock
;(handler as any).client = mockClient

const chunks = []
const stream = handler.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }])

for await (const chunk of stream) {
chunks.push(chunk)
}

// Should NOT have a fallback text response for non-GLM models
const textChunks = chunks.filter((c) => c.type === "text")
expect(textChunks).toHaveLength(0)

// Should still have usage metrics
const usageChunks = chunks.filter((c) => c.type === "usage")
expect(usageChunks).toHaveLength(1)
})
})

describe("BaseOpenAiCompatibleProvider", () => {
class TestProvider extends BaseOpenAiCompatibleProvider<"glm-4.6" | "other-model"> {
constructor(modelId: "glm-4.6" | "other-model") {
super({
providerName: "Test",
baseURL: "https://test.com",
apiKey: "test-key",
defaultProviderModelId: modelId,
providerModels: {
"glm-4.6": {
maxTokens: 4096,
contextWindow: 8192,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
"other-model": {
maxTokens: 4096,
contextWindow: 8192,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
},
},
apiModelId: modelId,
})
}
}

it("should provide fallback response for GLM models with empty streams", async () => {
const provider = new TestProvider("glm-4.6")

// Mock the client
const mockClient = {
chat: {
completions: {
create: vi.fn().mockImplementation(async function* () {
// Simulate empty stream
yield {
choices: [{ delta: {} }],
usage: {
prompt_tokens: 100,
completion_tokens: 0,
},
}
}),
},
},
}
;(provider as any).client = mockClient

const chunks = []
const stream = provider.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }])

for await (const chunk of stream) {
chunks.push(chunk)
}

// Should have a fallback text response
const textChunks = chunks.filter((c) => c.type === "text")
expect(textChunks).toHaveLength(1)
expect(textChunks[0].text).toContain("trouble generating a response")
})

it("should not provide fallback for non-GLM models", async () => {
const provider = new TestProvider("other-model")

// Mock the client
const mockClient = {
chat: {
completions: {
create: vi.fn().mockImplementation(async function* () {
// Simulate empty stream
yield {
choices: [{ delta: {} }],
usage: {
prompt_tokens: 100,
completion_tokens: 0,
},
}
}),
},
},
}
;(provider as any).client = mockClient

const chunks = []
const stream = provider.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }])

for await (const chunk of stream) {
chunks.push(chunk)
}

// Should NOT have a fallback text response
const textChunks = chunks.filter((c) => c.type === "text")
expect(textChunks).toHaveLength(0)
})
})
})
15 changes: 15 additions & 0 deletions src/api/providers/base-openai-compatible-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,14 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
const stream = await this.createStream(systemPrompt, messages, metadata)
let hasContent = false
const modelId = this.getModel().id

for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta

if (delta?.content) {
hasContent = true
yield {
type: "text",
text: delta.content,
Expand All @@ -117,6 +120,18 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
}
}
}

// For GLM models that may return empty streams, provide a fallback response
if (
!hasContent &&
modelId &&
(modelId.toLowerCase().includes("glm") || modelId.toLowerCase().includes("chatglm"))
) {
yield {
type: "text",
text: "I'm having trouble generating a response. Please try rephrasing your request or breaking it down into smaller steps.",
}
}
}

async completePrompt(prompt: string): Promise<string> {
Expand Down
16 changes: 16 additions & 0 deletions src/api/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -189,17 +189,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
)

let lastUsage
let hasContent = false

for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta ?? {}

if (delta.content) {
hasContent = true
for (const chunk of matcher.update(delta.content)) {
yield chunk
}
}

if ("reasoning_content" in delta && delta.reasoning_content) {
hasContent = true
yield {
type: "reasoning",
text: (delta.reasoning_content as string | undefined) || "",
Expand All @@ -211,9 +214,22 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
}

for (const chunk of matcher.final()) {
hasContent = true
yield chunk
}

// For GLM models that may return empty streams, provide a fallback response
if (
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[P2] Duplicate GLM detection logic. The same modelId.includes('glm'|'chatglm') condition appears here, in BaseOpenAiCompatibleProvider, and in Task.ts. Consider extracting a small helper (e.g., isGlmModel(modelId)) to keep behavior consistent and reduce drift.

!hasContent &&
modelId &&
(modelId.toLowerCase().includes("glm") || modelId.toLowerCase().includes("chatglm"))
) {
yield {
type: "text",
text: "I'm having trouble generating a response. Please try rephrasing your request or breaking it down into smaller steps.",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The GLM fallback response here is hardcoded. To ensure consistency and proper localization, consider wrapping this error message in a translation function.

Suggested change
text: "I'm having trouble generating a response. Please try rephrasing your request or breaking it down into smaller steps.",
text: t("I'm having trouble generating a response. Please try rephrasing your request or breaking it down into smaller steps."),

This comment was generated because it violated a code review rule: irule_C0ez7Rji6ANcGkkX.

}
}

if (lastUsage) {
yield this.processUsageMetrics(lastUsage, modelInfo)
}
Expand Down
58 changes: 48 additions & 10 deletions src/core/task/Task.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2339,17 +2339,55 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
continue
} else {
// If there's no assistant_responses, that means we got no text
// or tool_use content blocks from API which we should assume is
// an error.
await this.say(
"error",
"Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output.",
)
// or tool_use content blocks from API. This can happen with some models
// like GLM-4.6 that may return empty streams occasionally.
// Instead of treating this as a fatal error, we'll handle it gracefully.
const modelId = this.api.getModel().id
const isGLMModel =
modelId && (modelId.toLowerCase().includes("glm") || modelId.toLowerCase().includes("chatglm"))

if (isGLMModel) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the GLM-specific branch handling empty responses (around lines 2349–2365), fallback messages and prompts are hardcoded. Consider using a translation function (e.g. t('...')) and possibly refactoring the duplicated GLM check into a helper for clarity.

This comment was generated because it violated a code review rule: irule_C0ez7Rji6ANcGkkX.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[P1] Potential infinite retry loop for GLM empty streams. This path pushes a clarification and immediately retries with no max attempts or backoff. If the provider keeps returning empty streams, this can loop indefinitely and spam error messages. Recommend adding a capped retry counter (e.g., 1–3 attempts) with small delay and logging, and falling back to the standard error after the cap. Also consider adding unit tests that simulate repeated empty responses to verify the retry cap and final behavior.

// For GLM models, treat empty response as a temporary issue and retry
await this.say(
"error",
"The GLM model returned an empty response. This can happen occasionally with GLM models. Retrying with a clarification request...",
)

await this.addToApiConversationHistory({
role: "assistant",
content: [{ type: "text", text: "Failure: I did not provide a response." }],
})
// Add a minimal assistant response to maintain conversation flow
await this.addToApiConversationHistory({
role: "assistant",
content: [
{
type: "text",
text: "I encountered an issue generating a response. Let me try again.",
},
],
})

// Add a user message prompting the model to respond
this.userMessageContent.push({
type: "text",
text: "Please provide a response to the previous request. If you're having trouble, break down the task into smaller steps.",
})

// Continue the loop to retry with the clarification
stack.push({
userContent: [...this.userMessageContent],
includeFileDetails: false,
})
continue
} else {
// For other models, log a more informative error
await this.say(
"error",
"Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output. Consider checking your API configuration or trying a different model.",
)

await this.addToApiConversationHistory({
role: "assistant",
content: [{ type: "text", text: "Failure: I did not provide a response." }],
})
}
}

// If we reach here without continuing, return false (will always be false for now)
Expand Down
Loading