-
Notifications
You must be signed in to change notification settings - Fork 2.3k
feat: Add DeepSeek V3.1 variants and GLM-4.6 with reasoning support (… #8479
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -3,6 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk" | |||||
import OpenAI from "openai" | ||||||
|
||||||
import type { ApiHandlerOptions } from "../../shared/api" | ||||||
import { shouldUseReasoningEffort } from "../../shared/api" | ||||||
import { XmlMatcher } from "../../utils/xml-matcher" | ||||||
import { convertToR1Format } from "../transform/r1-format" | ||||||
import { convertToOpenAiMessages } from "../transform/openai-format" | ||||||
|
@@ -26,6 +27,7 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> { | |||||
private getCompletionParams( | ||||||
systemPrompt: string, | ||||||
messages: Anthropic.Messages.MessageParam[], | ||||||
enableReasoning: boolean = false, | ||||||
): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming { | ||||||
const { | ||||||
id: model, | ||||||
|
@@ -34,19 +36,29 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> { | |||||
|
||||||
const temperature = this.options.modelTemperature ?? this.getModel().info.temperature | ||||||
|
||||||
return { | ||||||
const params: any = { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [P3] Typing: Avoid any here; you can return the exact type to improve maintainability and catch mistakes earlier.
Suggested change
|
||||||
model, | ||||||
max_tokens, | ||||||
temperature, | ||||||
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], | ||||||
stream: true, | ||||||
stream_options: { include_usage: true }, | ||||||
} | ||||||
|
||||||
// Add reasoning support for DeepSeek V3.1, GLM-4.5, and GLM-4.6 models | ||||||
if (enableReasoning) { | ||||||
params.chat_template_kwargs = { | ||||||
thinking: true, | ||||||
} | ||||||
} | ||||||
|
||||||
return params | ||||||
} | ||||||
|
||||||
override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream { | ||||||
const model = this.getModel() | ||||||
|
||||||
// Handle DeepSeek R1 models with XML tag parsing | ||||||
if (model.id.includes("DeepSeek-R1")) { | ||||||
const stream = await this.client.chat.completions.create({ | ||||||
...this.getCompletionParams(systemPrompt, messages), | ||||||
|
@@ -84,7 +96,48 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> { | |||||
for (const processedChunk of matcher.final()) { | ||||||
yield processedChunk | ||||||
} | ||||||
return | ||||||
} | ||||||
|
||||||
// Handle DeepSeek V3.1, GLM-4.5, and GLM-4.6 models with reasoning_content parsing | ||||||
const isHybridReasoningModel = | ||||||
model.id.includes("DeepSeek-V3.1") || model.id.includes("GLM-4.5") || model.id.includes("GLM-4.6") | ||||||
const reasoningEnabled = this.options.enableReasoningEffort === true | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [P2] Consistency with reasoning toggle: This direct check (=== true) bypasses the shared helper and may diverge from global defaults or future logic. Prefer using the existing shouldUseReasoningEffort helper so provider behavior stays consistent across backends. Also remove the unused import if you decide to keep the direct check. |
||||||
|
||||||
if (isHybridReasoningModel && reasoningEnabled) { | ||||||
const stream = await this.client.chat.completions.create( | ||||||
this.getCompletionParams(systemPrompt, messages, true), | ||||||
) | ||||||
|
||||||
for await (const chunk of stream) { | ||||||
const delta = chunk.choices[0]?.delta | ||||||
|
||||||
// Handle reasoning content from the response | ||||||
if ((delta as any)?.reasoning_content) { | ||||||
yield { | ||||||
type: "reasoning", | ||||||
text: (delta as any).reasoning_content, | ||||||
} | ||||||
} | ||||||
|
||||||
// Handle regular text content | ||||||
if (delta?.content) { | ||||||
yield { | ||||||
type: "text", | ||||||
text: delta.content, | ||||||
} | ||||||
} | ||||||
|
||||||
if (chunk.usage) { | ||||||
yield { | ||||||
type: "usage", | ||||||
inputTokens: chunk.usage.prompt_tokens || 0, | ||||||
outputTokens: chunk.usage.completion_tokens || 0, | ||||||
} | ||||||
} | ||||||
} | ||||||
} else { | ||||||
// For non-reasoning models or when reasoning is disabled, use the base implementation | ||||||
yield* super.createMessage(systemPrompt, messages) | ||||||
} | ||||||
} | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[P2] Potential breaking change: Renaming the model id from "DeepSeek-V3.1-Turbo" to "DeepSeek-V3.1-turbo" will break users who have existing configs referencing the old id. Consider adding a temporary alias/back-compat mapping (accept both ids) or a migration to remap the old value to the new one before lookup to avoid surprising failures.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the correct model id for chutes for the model is "deepseek-ai/DeepSeek-V3.1-turbo"
