Skip to content

Commit 97f9686

Browse files
authored
Clamp GPT-5 max output tokens to 20% of context window (#8495)
1 parent 8c6587a commit 97f9686

File tree

2 files changed

+8
-31
lines changed

2 files changed

+8
-31
lines changed

src/shared/__tests__/api.spec.ts

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -194,18 +194,17 @@ describe("getModelMaxOutputTokens", () => {
194194
expect(result).toBe(20_000) // Should use model.maxTokens since it's exactly at 20%
195195
})
196196

197-
test("should bypass 20% cap for GPT-5 models and use exact configured max tokens", () => {
197+
test("should apply 20% cap for GPT-5 models like other models", () => {
198198
const model: ModelInfo = {
199199
contextWindow: 200_000,
200200
supportsPromptCache: false,
201-
maxTokens: 128_000, // 64% of context window, normally would be capped
201+
maxTokens: 128_000, // 64% of context window, should be capped
202202
}
203203

204204
const settings: ProviderSettings = {
205205
apiProvider: "openai",
206206
}
207207

208-
// Test various GPT-5 model IDs
209208
const gpt5ModelIds = ["gpt-5", "gpt-5-turbo", "GPT-5", "openai/gpt-5-preview", "gpt-5-32k", "GPT-5-TURBO"]
210209

211210
gpt5ModelIds.forEach((modelId) => {
@@ -215,8 +214,8 @@ describe("getModelMaxOutputTokens", () => {
215214
settings,
216215
format: "openai",
217216
})
218-
// Should use full 128k tokens, not capped to 20% (40k)
219-
expect(result).toBe(128_000)
217+
// Should be capped to 20% of context window: 200_000 * 0.2 = 40_000
218+
expect(result).toBe(40_000)
220219
})
221220
})
222221

@@ -246,23 +245,11 @@ describe("getModelMaxOutputTokens", () => {
246245
})
247246
})
248247

249-
test("should handle GPT-5 models with various max token configurations", () => {
248+
test("should cap GPT-5 models to min(model.maxTokens, 20% of contextWindow)", () => {
250249
const testCases = [
251-
{
252-
maxTokens: 128_000,
253-
contextWindow: 200_000,
254-
expected: 128_000, // Uses full 128k
255-
},
256-
{
257-
maxTokens: 64_000,
258-
contextWindow: 200_000,
259-
expected: 64_000, // Uses configured 64k
260-
},
261-
{
262-
maxTokens: 256_000,
263-
contextWindow: 400_000,
264-
expected: 256_000, // Uses full 256k even though it's 64% of context
265-
},
250+
{ maxTokens: 128_000, contextWindow: 200_000, expected: 40_000 },
251+
{ maxTokens: 64_000, contextWindow: 200_000, expected: 40_000 },
252+
{ maxTokens: 256_000, contextWindow: 400_000, expected: 80_000 },
266253
]
267254

268255
testCases.forEach(({ maxTokens, contextWindow, expected }) => {

src/shared/api.ts

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,7 @@ export const getModelMaxOutputTokens = ({
116116
}
117117

118118
// If model has explicit maxTokens, clamp it to 20% of the context window
119-
// Exception: GPT-5 models should use their exact configured max output tokens
120119
if (model.maxTokens) {
121-
// Check if this is a GPT-5 model (case-insensitive)
122-
const isGpt5Model = modelId.toLowerCase().includes("gpt-5")
123-
124-
// GPT-5 models bypass the 20% cap and use their full configured max tokens
125-
if (isGpt5Model) {
126-
return model.maxTokens
127-
}
128-
129-
// All other models are clamped to 20% of context window
130120
return Math.min(model.maxTokens, Math.ceil(model.contextWindow * 0.2))
131121
}
132122

0 commit comments

Comments
 (0)