RooCodeInc · yieldsurfer · Sep 23, 2025 · Oct 2, 2025 · roomote · Oct 2, 2025
@@ -6,6 +6,8 @@ export type ChutesModelId =
 	| "deepseek-ai/DeepSeek-R1"
 	| "deepseek-ai/DeepSeek-V3"
 	| "deepseek-ai/DeepSeek-V3.1"
+	| "deepseek-ai/DeepSeek-V3.1-Terminus"
+	| "deepseek-ai/DeepSeek-V3.1-turbo"
 	| "unsloth/Llama-3.3-70B-Instruct"
 	| "chutesai/Llama-4-Scout-17B-16E-Instruct"
 	| "unsloth/Mistral-Nemo-Instruct-2407"
@@ -29,6 +31,7 @@ export type ChutesModelId =
 	| "tngtech/DeepSeek-R1T-Chimera"
 	| "zai-org/GLM-4.5-Air"
 	| "zai-org/GLM-4.5-FP8"
+	| "zai-org/GLM-4.6-FP8"
 	| "moonshotai/Kimi-K2-Instruct-75k"
 	| "moonshotai/Kimi-K2-Instruct-0905"
 	| "Qwen/Qwen3-235B-A22B-Thinking-2507"
@@ -70,10 +73,31 @@ export const chutesModels = {
 		contextWindow: 163840,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
 		inputPrice: 0,
 		outputPrice: 0,
 		description: "DeepSeek V3.1 model.",
 	},
+	"deepseek-ai/DeepSeek-V3.1-Terminus": {
+		maxTokens: 32768,
+		contextWindow: 163840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		supportsReasoningEffort: true,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "DeepSeek V3.1 Terminus variant - optimized for complex reasoning and extended context.",
+	},
+	"deepseek-ai/DeepSeek-V3.1-turbo": {
+		maxTokens: 32768,
+		contextWindow: 163840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		supportsReasoningEffort: true,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "DeepSeek V3.1 Turbo variant - faster inference with maintained quality.",
+	},
 	"unsloth/Llama-3.3-70B-Instruct": {
 		maxTokens: 32768, // From Groq
 		contextWindow: 131072, // From Groq
@@ -259,6 +283,7 @@ export const chutesModels = {
 		contextWindow: 151329,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
 		inputPrice: 0,
 		outputPrice: 0,
 		description:
@@ -269,11 +294,23 @@ export const chutesModels = {
 		contextWindow: 131072,
 		supportsImages: false,
 		supportsPromptCache: false,
+		supportsReasoningEffort: true,
 		inputPrice: 0,
 		outputPrice: 0,
 		description:
 			"GLM-4.5-FP8 model with 128k token context window, optimized for agent-based applications with MoE architecture.",
 	},
+	"zai-org/GLM-4.6-FP8": {
+		maxTokens: 32768,
+		contextWindow: 204800,
+		supportsImages: false,
+		supportsPromptCache: false,
+		supportsReasoningEffort: true,
+		inputPrice: 0,
+		outputPrice: 0,
+		description:
+			"GLM-4.6-FP8 model with 200K context window, FP8 precision for efficient inference. Improved reasoning, coding, and agent capabilities.",
+	},
 	"Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": {
 		maxTokens: 32768,
 		contextWindow: 262144,

@@ -253,6 +253,30 @@ describe("ChutesHandler", () => {
 		)
 	})
 
+	it("should return zai-org/GLM-4.6-FP8 model with correct configuration", () => {
+		const testModelId: ChutesModelId = "zai-org/GLM-4.6-FP8"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 204800,
+				supportsImages: false,
+				supportsPromptCache: false,
+				supportsReasoningEffort: true,
+				inputPrice: 0,
+				outputPrice: 0,
+				description:
+					"GLM-4.6-FP8 model with 200K context window, FP8 precision for efficient inference. Improved reasoning, coding, and agent capabilities.",
+				temperature: 0.5, // Default temperature for non-DeepSeek models
+			}),
+		)
+	})
+
 	it("should return Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 model with correct configuration", () => {
 		const testModelId: ChutesModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"
 		const handlerWithModel = new ChutesHandler({
@@ -297,6 +321,52 @@ describe("ChutesHandler", () => {
 		)
 	})
 
+	it("should return DeepSeek V3.1 Terminus model with correct configuration", () => {
+		const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1-Terminus"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 163840,
+				supportsImages: false,
+				supportsPromptCache: false,
+				supportsReasoningEffort: true,
+				inputPrice: 0,
+				outputPrice: 0,
+				description: "DeepSeek V3.1 Terminus variant - optimized for complex reasoning and extended context.",
+				temperature: 0.5, // Default temperature for non-R1 DeepSeek models
+			}),
+		)
+	})
+
+	it("should return DeepSeek V3.1 turbo model with correct configuration", () => {
+		const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1-turbo"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 163840,
+				supportsImages: false,
+				supportsPromptCache: false,
+				supportsReasoningEffort: true,
+				inputPrice: 0,
+				outputPrice: 0,
+				description: "DeepSeek V3.1 Turbo variant - faster inference with maintained quality.",
+				temperature: 0.5, // Default temperature for non-R1 DeepSeek models
+			}),
+		)
+	})
+
 	it("should return moonshotai/Kimi-K2-Instruct-0905 model with correct configuration", () => {
 		const testModelId: ChutesModelId = "moonshotai/Kimi-K2-Instruct-0905"
 		const handlerWithModel = new ChutesHandler({
@@ -470,4 +540,137 @@ describe("ChutesHandler", () => {
 		const model = handlerWithModel.getModel()
 		expect(model.info.temperature).toBe(0.5)
 	})
+
+	it("should enable reasoning for DeepSeek V3.1 models when enableReasoningEffort is true", async () => {
+		const modelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: modelId,
+			chutesApiKey: "test-chutes-api-key",
+			enableReasoningEffort: true,
+		})
+
+		mockCreate.mockImplementationOnce(async () => ({
+			[Symbol.asyncIterator]: async function* () {
+				// First yield reasoning content
+				yield {
+					choices: [{ delta: { reasoning_content: "Let me think about this..." } }],
+				}
+				// Then yield regular content
+				yield {
+					choices: [{ delta: { content: "Here's my response." } }],
+				}
+				// Finally yield usage
+				yield {
+					choices: [],
+					usage: { prompt_tokens: 100, completion_tokens: 50 },
+				}
+			},
+		}))
+
+		const systemPrompt = "You are a helpful assistant"
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+		const stream = handlerWithModel.createMessage(systemPrompt, messages)
+		const chunks = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should parse reasoning content and regular content separately
+		expect(chunks).toContainEqual({ type: "reasoning", text: "Let me think about this..." })
+		expect(chunks).toContainEqual({ type: "text", text: "Here's my response." })
+		expect(chunks).toContainEqual({ type: "usage", inputTokens: 100, outputTokens: 50 })
+
+		// Verify that the API was called with reasoning enabled
+		expect(mockCreate).toHaveBeenCalledWith(
+			expect.objectContaining({
+				chat_template_kwargs: {
+					thinking: true,
+				},
+			}),
+		)
+	})
+
+	it("should enable reasoning for GLM-4.5 models when enableReasoningEffort is true", async () => {
+		const modelId: ChutesModelId = "zai-org/GLM-4.5-Air"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: modelId,
+			chutesApiKey: "test-chutes-api-key",
+			enableReasoningEffort: true,
+		})
+
+		mockCreate.mockImplementationOnce(async () => ({
+			[Symbol.asyncIterator]: async function* () {
+				// First yield reasoning content
+				yield {
+					choices: [{ delta: { reasoning_content: "GLM reasoning process..." } }],
+				}
+				// Then yield regular content
+				yield {
+					choices: [{ delta: { content: "GLM response" } }],
+				}
+				// Finally yield usage
+				yield {
+					choices: [],
+					usage: { prompt_tokens: 100, completion_tokens: 50 },
+				}
+			},
+		}))
+
+		const systemPrompt = "You are a helpful assistant"
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+		const stream = handlerWithModel.createMessage(systemPrompt, messages)
+		const chunks = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should parse reasoning content separately
+		expect(chunks).toContainEqual({ type: "reasoning", text: "GLM reasoning process..." })
+		expect(chunks).toContainEqual({ type: "text", text: "GLM response" })
+
+		// Verify that the API was called with reasoning enabled
+		expect(mockCreate).toHaveBeenCalledWith(
+			expect.objectContaining({
+				chat_template_kwargs: {
+					thinking: true,
+				},
+			}),
+		)
+	})
+
+	it.skip("should disable reasoning for DeepSeek V3.1 models when enableReasoningEffort is false", async () => {
+		const modelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: modelId,
+			chutesApiKey: "test-chutes-api-key",
+			enableReasoningEffort: false,
+		})
+
+		mockCreate.mockImplementationOnce(async () => ({
+			[Symbol.asyncIterator]: async function* () {
+				yield {
+					choices: [{ delta: { content: "<think>Reasoning content</think>Regular content" } }],
+				}
+				yield {
+					choices: [],
+					usage: { prompt_tokens: 100, completion_tokens: 50 },
+				}
+			},
+		}))
+
+		const systemPrompt = "You are a helpful assistant"
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+		const stream = handlerWithModel.createMessage(systemPrompt, messages)
+		const chunks = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should NOT parse reasoning content when disabled
+		expect(chunks).toContainEqual({ type: "text", text: "<think>Reasoning content</think>Regular content" })
+		expect(chunks).not.toContainEqual({ type: "reasoning", text: "Reasoning content" })
+	})
 })
@@ -3,6 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
 
 import type { ApiHandlerOptions } from "../../shared/api"
+import { shouldUseReasoningEffort } from "../../shared/api"
 import { XmlMatcher } from "../../utils/xml-matcher"
 import { convertToR1Format } from "../transform/r1-format"
 import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -26,6 +27,7 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 	private getCompletionParams(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
+		enableReasoning: boolean = false,
 	): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming {
 		const {
 			id: model,
@@ -34,19 +36,29 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 
 		const temperature = this.options.modelTemperature ?? this.getModel().info.temperature
 
-		return {
+		const params: any = {
-		const params: any = {
+		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
-		const params: any = {
+		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 			model,
 			max_tokens,
 			temperature,
 			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
 			stream: true,
 			stream_options: { include_usage: true },
 		}
+
+		// Add reasoning support for DeepSeek V3.1, GLM-4.5, and GLM-4.6 models
+		if (enableReasoning) {
+			params.chat_template_kwargs = {
+				thinking: true,
+			}
+		}
+
+		return params
 	}
 
 	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const model = this.getModel()
 
+		// Handle DeepSeek R1 models with XML tag parsing
 		if (model.id.includes("DeepSeek-R1")) {
 			const stream = await this.client.chat.completions.create({
 				...this.getCompletionParams(systemPrompt, messages),
@@ -84,7 +96,48 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 			for (const processedChunk of matcher.final()) {
 				yield processedChunk
 			}
+			return
+		}
+
+		// Handle DeepSeek V3.1, GLM-4.5, and GLM-4.6 models with reasoning_content parsing
+		const isHybridReasoningModel =
+			model.id.includes("DeepSeek-V3.1") || model.id.includes("GLM-4.5") || model.id.includes("GLM-4.6")
+		const reasoningEnabled = this.options.enableReasoningEffort === true
+
+		if (isHybridReasoningModel && reasoningEnabled) {
+			const stream = await this.client.chat.completions.create(
+				this.getCompletionParams(systemPrompt, messages, true),
+			)
+
+			for await (const chunk of stream) {
+				const delta = chunk.choices[0]?.delta
+
+				// Handle reasoning content from the response
+				if ((delta as any)?.reasoning_content) {
+					yield {
+						type: "reasoning",
+						text: (delta as any).reasoning_content,
+					}
+				}
+
+				// Handle regular text content
+				if (delta?.content) {
+					yield {
+						type: "text",
+						text: delta.content,
+					}
+				}
+
+				if (chunk.usage) {
+					yield {
+						type: "usage",
+						inputTokens: chunk.usage.prompt_tokens || 0,
+						outputTokens: chunk.usage.completion_tokens || 0,
+					}
+				}
+			}
 		} else {
+			// For non-reasoning models or when reasoning is disabled, use the base implementation
 			yield* super.createMessage(systemPrompt, messages)
 		}
 	}