-
Notifications
You must be signed in to change notification settings - Fork 2.3k
fix: handle empty stream responses from GLM models #8483
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
import { describe, it, expect, vi, beforeEach } from "vitest" | ||
import { OpenAiHandler } from "../openai" | ||
import { BaseOpenAiCompatibleProvider } from "../base-openai-compatible-provider" | ||
|
||
describe("GLM Empty Stream Handling", () => { | ||
describe("OpenAiHandler", () => { | ||
it("should provide fallback response for GLM models with empty streams", async () => { | ||
const mockClient = { | ||
chat: { | ||
completions: { | ||
create: vi.fn().mockImplementation(async function* () { | ||
// Simulate empty stream - only usage, no content | ||
yield { | ||
choices: [{ delta: {} }], | ||
usage: { | ||
prompt_tokens: 100, | ||
completion_tokens: 0, | ||
}, | ||
} | ||
}), | ||
}, | ||
}, | ||
} | ||
|
||
const handler = new OpenAiHandler({ | ||
openAiApiKey: "test-key", | ||
openAiModelId: "glm-4.6", | ||
openAiStreamingEnabled: true, | ||
}) | ||
|
||
// Replace the client with our mock | ||
;(handler as any).client = mockClient | ||
|
||
const chunks = [] | ||
const stream = handler.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]) | ||
|
||
for await (const chunk of stream) { | ||
chunks.push(chunk) | ||
} | ||
|
||
// Should have a fallback text response | ||
const textChunks = chunks.filter((c) => c.type === "text") | ||
expect(textChunks).toHaveLength(1) | ||
expect(textChunks[0].text).toContain("trouble generating a response") | ||
|
||
// Should still have usage metrics | ||
const usageChunks = chunks.filter((c) => c.type === "usage") | ||
expect(usageChunks).toHaveLength(1) | ||
}) | ||
|
||
it("should not provide fallback for non-GLM models with empty streams", async () => { | ||
const mockClient = { | ||
chat: { | ||
completions: { | ||
create: vi.fn().mockImplementation(async function* () { | ||
// Simulate empty stream - only usage, no content | ||
yield { | ||
choices: [{ delta: {} }], | ||
usage: { | ||
prompt_tokens: 100, | ||
completion_tokens: 0, | ||
}, | ||
} | ||
}), | ||
}, | ||
}, | ||
} | ||
|
||
const handler = new OpenAiHandler({ | ||
openAiApiKey: "test-key", | ||
openAiModelId: "gpt-4", | ||
openAiStreamingEnabled: true, | ||
}) | ||
|
||
// Replace the client with our mock | ||
;(handler as any).client = mockClient | ||
|
||
const chunks = [] | ||
const stream = handler.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]) | ||
|
||
for await (const chunk of stream) { | ||
chunks.push(chunk) | ||
} | ||
|
||
// Should NOT have a fallback text response for non-GLM models | ||
const textChunks = chunks.filter((c) => c.type === "text") | ||
expect(textChunks).toHaveLength(0) | ||
|
||
// Should still have usage metrics | ||
const usageChunks = chunks.filter((c) => c.type === "usage") | ||
expect(usageChunks).toHaveLength(1) | ||
}) | ||
}) | ||
|
||
describe("BaseOpenAiCompatibleProvider", () => { | ||
class TestProvider extends BaseOpenAiCompatibleProvider<"glm-4.6" | "other-model"> { | ||
constructor(modelId: "glm-4.6" | "other-model") { | ||
super({ | ||
providerName: "Test", | ||
baseURL: "https://test.com", | ||
apiKey: "test-key", | ||
defaultProviderModelId: modelId, | ||
providerModels: { | ||
"glm-4.6": { | ||
maxTokens: 4096, | ||
contextWindow: 8192, | ||
supportsPromptCache: false, | ||
inputPrice: 0, | ||
outputPrice: 0, | ||
}, | ||
"other-model": { | ||
maxTokens: 4096, | ||
contextWindow: 8192, | ||
supportsPromptCache: false, | ||
inputPrice: 0, | ||
outputPrice: 0, | ||
}, | ||
}, | ||
apiModelId: modelId, | ||
}) | ||
} | ||
} | ||
|
||
it("should provide fallback response for GLM models with empty streams", async () => { | ||
const provider = new TestProvider("glm-4.6") | ||
|
||
// Mock the client | ||
const mockClient = { | ||
chat: { | ||
completions: { | ||
create: vi.fn().mockImplementation(async function* () { | ||
// Simulate empty stream | ||
yield { | ||
choices: [{ delta: {} }], | ||
usage: { | ||
prompt_tokens: 100, | ||
completion_tokens: 0, | ||
}, | ||
} | ||
}), | ||
}, | ||
}, | ||
} | ||
;(provider as any).client = mockClient | ||
|
||
const chunks = [] | ||
const stream = provider.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]) | ||
|
||
for await (const chunk of stream) { | ||
chunks.push(chunk) | ||
} | ||
|
||
// Should have a fallback text response | ||
const textChunks = chunks.filter((c) => c.type === "text") | ||
expect(textChunks).toHaveLength(1) | ||
expect(textChunks[0].text).toContain("trouble generating a response") | ||
}) | ||
|
||
it("should not provide fallback for non-GLM models", async () => { | ||
const provider = new TestProvider("other-model") | ||
|
||
// Mock the client | ||
const mockClient = { | ||
chat: { | ||
completions: { | ||
create: vi.fn().mockImplementation(async function* () { | ||
// Simulate empty stream | ||
yield { | ||
choices: [{ delta: {} }], | ||
usage: { | ||
prompt_tokens: 100, | ||
completion_tokens: 0, | ||
}, | ||
} | ||
}), | ||
}, | ||
}, | ||
} | ||
;(provider as any).client = mockClient | ||
|
||
const chunks = [] | ||
const stream = provider.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]) | ||
|
||
for await (const chunk of stream) { | ||
chunks.push(chunk) | ||
} | ||
|
||
// Should NOT have a fallback text response | ||
const textChunks = chunks.filter((c) => c.type === "text") | ||
expect(textChunks).toHaveLength(0) | ||
}) | ||
}) | ||
}) |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -189,17 +189,20 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl | |||||
) | ||||||
|
||||||
let lastUsage | ||||||
let hasContent = false | ||||||
|
||||||
for await (const chunk of stream) { | ||||||
const delta = chunk.choices[0]?.delta ?? {} | ||||||
|
||||||
if (delta.content) { | ||||||
hasContent = true | ||||||
for (const chunk of matcher.update(delta.content)) { | ||||||
yield chunk | ||||||
} | ||||||
} | ||||||
|
||||||
if ("reasoning_content" in delta && delta.reasoning_content) { | ||||||
hasContent = true | ||||||
yield { | ||||||
type: "reasoning", | ||||||
text: (delta.reasoning_content as string | undefined) || "", | ||||||
|
@@ -211,9 +214,22 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl | |||||
} | ||||||
|
||||||
for (const chunk of matcher.final()) { | ||||||
hasContent = true | ||||||
yield chunk | ||||||
} | ||||||
|
||||||
// For GLM models that may return empty streams, provide a fallback response | ||||||
if ( | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [P2] Duplicate GLM detection logic. The same modelId.includes('glm'|'chatglm') condition appears here, in BaseOpenAiCompatibleProvider, and in Task.ts. Consider extracting a small helper (e.g., isGlmModel(modelId)) to keep behavior consistent and reduce drift. |
||||||
!hasContent && | ||||||
modelId && | ||||||
(modelId.toLowerCase().includes("glm") || modelId.toLowerCase().includes("chatglm")) | ||||||
) { | ||||||
yield { | ||||||
type: "text", | ||||||
text: "I'm having trouble generating a response. Please try rephrasing your request or breaking it down into smaller steps.", | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The GLM fallback response here is hardcoded. To ensure consistency and proper localization, consider wrapping this error message in a translation function.
Suggested change
This comment was generated because it violated a code review rule: irule_C0ez7Rji6ANcGkkX. |
||||||
} | ||||||
} | ||||||
|
||||||
if (lastUsage) { | ||||||
yield this.processUsageMetrics(lastUsage, modelInfo) | ||||||
} | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2339,17 +2339,55 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike { | |
continue | ||
} else { | ||
// If there's no assistant_responses, that means we got no text | ||
// or tool_use content blocks from API which we should assume is | ||
// an error. | ||
await this.say( | ||
"error", | ||
"Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output.", | ||
) | ||
// or tool_use content blocks from API. This can happen with some models | ||
// like GLM-4.6 that may return empty streams occasionally. | ||
// Instead of treating this as a fatal error, we'll handle it gracefully. | ||
const modelId = this.api.getModel().id | ||
const isGLMModel = | ||
modelId && (modelId.toLowerCase().includes("glm") || modelId.toLowerCase().includes("chatglm")) | ||
|
||
if (isGLMModel) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the GLM-specific branch handling empty responses (around lines 2349–2365), fallback messages and prompts are hardcoded. Consider using a translation function (e.g. t('...')) and possibly refactoring the duplicated GLM check into a helper for clarity. This comment was generated because it violated a code review rule: irule_C0ez7Rji6ANcGkkX. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [P1] Potential infinite retry loop for GLM empty streams. This path pushes a clarification and immediately retries with no max attempts or backoff. If the provider keeps returning empty streams, this can loop indefinitely and spam error messages. Recommend adding a capped retry counter (e.g., 1–3 attempts) with small delay and logging, and falling back to the standard error after the cap. Also consider adding unit tests that simulate repeated empty responses to verify the retry cap and final behavior. |
||
// For GLM models, treat empty response as a temporary issue and retry | ||
await this.say( | ||
"error", | ||
"The GLM model returned an empty response. This can happen occasionally with GLM models. Retrying with a clarification request...", | ||
) | ||
|
||
await this.addToApiConversationHistory({ | ||
role: "assistant", | ||
content: [{ type: "text", text: "Failure: I did not provide a response." }], | ||
}) | ||
// Add a minimal assistant response to maintain conversation flow | ||
await this.addToApiConversationHistory({ | ||
role: "assistant", | ||
content: [ | ||
{ | ||
type: "text", | ||
text: "I encountered an issue generating a response. Let me try again.", | ||
}, | ||
], | ||
}) | ||
|
||
// Add a user message prompting the model to respond | ||
this.userMessageContent.push({ | ||
type: "text", | ||
text: "Please provide a response to the previous request. If you're having trouble, break down the task into smaller steps.", | ||
}) | ||
|
||
// Continue the loop to retry with the clarification | ||
stack.push({ | ||
userContent: [...this.userMessageContent], | ||
includeFileDetails: false, | ||
}) | ||
continue | ||
} else { | ||
// For other models, log a more informative error | ||
await this.say( | ||
"error", | ||
"Unexpected API Response: The language model did not provide any assistant messages. This may indicate an issue with the API or the model's output. Consider checking your API configuration or trying a different model.", | ||
) | ||
|
||
await this.addToApiConversationHistory({ | ||
role: "assistant", | ||
content: [{ type: "text", text: "Failure: I did not provide a response." }], | ||
}) | ||
} | ||
} | ||
|
||
// If we reach here without continuing, return false (will always be false for now) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[P3] Unused import 'beforeEach' may trip stricter lint settings; suggest removing it.