Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,16 @@ Here is an example `.claude/settings.json` file:
"ANTHROPIC_BASE_URL": "http://localhost:4141",
"ANTHROPIC_AUTH_TOKEN": "dummy",
"ANTHROPIC_MODEL": "gpt-4.1",
"ANTHROPIC_SMALL_FAST_MODEL": "gpt-4.1"
"ANTHROPIC_DEFAULT_SONNET_MODEL": "gpt-4.1",
"ANTHROPIC_SMALL_FAST_MODEL": "gpt-4.1",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can probably be removed here +corresponding start.ts, anthropic have deprecated in favor of ANTHROPIC_DEFAULT_HAIKU_MODEL which you have now also included,
I think can also close #89 then, since original question and followup comment are addressed by this PR

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ANTHROPIC_SMALL_FAST_MODEL For compatibility with lower versions

"ANTHROPIC_DEFAULT_HAIKU_MODEL": "gpt-4.1",
"DISABLE_NON_ESSENTIAL_MODEL_CALLS": "1",
"CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1"
},
"permissions": {
"deny": [
"WebSearch"
]
}
}
```
Expand Down
360 changes: 334 additions & 26 deletions src/lib/tokenizer.ts
Original file line number Diff line number Diff line change
@@ -1,37 +1,345 @@
import { countTokens } from "gpt-tokenizer/model/gpt-4o"
import type {
ChatCompletionsPayload,
ContentPart,
Message,
Tool,
ToolCall,
} from "~/services/copilot/create-chat-completions"
import type { Model } from "~/services/copilot/get-models"

import type { Message } from "~/services/copilot/create-chat-completions"
// Encoder type mapping
const ENCODING_MAP = {
o200k_base: () => import("gpt-tokenizer/encoding/o200k_base"),
cl100k_base: () => import("gpt-tokenizer/encoding/cl100k_base"),
p50k_base: () => import("gpt-tokenizer/encoding/p50k_base"),
p50k_edit: () => import("gpt-tokenizer/encoding/p50k_edit"),
r50k_base: () => import("gpt-tokenizer/encoding/r50k_base"),
} as const

export const getTokenCount = (messages: Array<Message>) => {
const simplifiedMessages = messages.map((message) => {
let content = ""
if (typeof message.content === "string") {
content = message.content
} else if (Array.isArray(message.content)) {
content = message.content
.filter((part) => part.type === "text")
.map((part) => (part as { text: string }).text)
.join("")
type SupportedEncoding = keyof typeof ENCODING_MAP

// Define encoder interface
interface Encoder {
encode: (text: string) => Array<number>
}

// Cache loaded encoders to avoid repeated imports
const encodingCache = new Map<string, Encoder>()

/**
* Calculate tokens for tool calls
*/
const calculateToolCallsTokens = (
toolCalls: Array<ToolCall>,
encoder: Encoder,
constants: ReturnType<typeof getModelConstants>,
): number => {
let tokens = 0
for (const toolCall of toolCalls) {
tokens += constants.funcInit
tokens += encoder.encode(JSON.stringify(toolCall)).length
}
tokens += constants.funcEnd
return tokens
}

/**
* Calculate tokens for content parts
*/
const calculateContentPartsTokens = (
contentParts: Array<ContentPart>,
encoder: Encoder,
): number => {
let tokens = 0
for (const part of contentParts) {
if (part.type === "image_url") {
tokens += encoder.encode(part.image_url.url).length + 85
} else if (part.text) {
tokens += encoder.encode(part.text).length
}
}
return tokens
}

/**
* Calculate tokens for a single message
*/
const calculateMessageTokens = (
message: Message,
encoder: Encoder,
constants: ReturnType<typeof getModelConstants>,
): number => {
const tokensPerMessage = 3
const tokensPerName = 1
let tokens = tokensPerMessage
for (const [key, value] of Object.entries(message)) {
if (typeof value === "string") {
tokens += encoder.encode(value).length
}
if (key === "name") {
tokens += tokensPerName
}
if (key === "tool_calls") {
tokens += calculateToolCallsTokens(
value as Array<ToolCall>,
encoder,
constants,
)
}
if (key === "content" && Array.isArray(value)) {
tokens += calculateContentPartsTokens(
value as Array<ContentPart>,
encoder,
)
}
}
return tokens
}

/**
* Calculate tokens using custom algorithm
*/
const calculateTokens = (
messages: Array<Message>,
encoder: Encoder,
constants: ReturnType<typeof getModelConstants>,
): number => {
if (messages.length === 0) {
return 0
}
let numTokens = 0
for (const message of messages) {
numTokens += calculateMessageTokens(message, encoder, constants)
}
// every reply is primed with <|start|>assistant<|message|>
numTokens += 3
return numTokens
}

/**
* Get the corresponding encoder module based on encoding type
*/
const getEncodeChatFunction = async (encoding: string): Promise<Encoder> => {
if (encodingCache.has(encoding)) {
const cached = encodingCache.get(encoding)
if (cached) {
return cached
}
return { ...message, content }
})
}

const supportedEncoding = encoding as SupportedEncoding
if (!(supportedEncoding in ENCODING_MAP)) {
const fallbackModule = (await ENCODING_MAP.o200k_base()) as Encoder
encodingCache.set(encoding, fallbackModule)
return fallbackModule
}

const encodingModule = (await ENCODING_MAP[supportedEncoding]()) as Encoder
encodingCache.set(encoding, encodingModule)
return encodingModule
}

/**
* Get tokenizer type from model information
*/
export const getTokenizerFromModel = (model: Model): string => {
return model.capabilities.tokenizer || "o200k_base"
}

/**
* Get model-specific constants for token calculation
*/
const getModelConstants = (model: Model) => {
return model.id === "gpt-3.5-turbo" || model.id === "gpt-4" ?
{
funcInit: 10,
propInit: 3,
propKey: 3,
enumInit: -3,
enumItem: 3,
funcEnd: 12,
}
: {
funcInit: 7,
propInit: 3,
propKey: 3,
enumInit: -3,
enumItem: 3,
funcEnd: 12,
}
}

let inputMessages = simplifiedMessages.filter((message) => {
return message.role !== "tool"
})
let outputMessages: typeof simplifiedMessages = []
/**
* Calculate tokens for a single parameter
*/
const calculateParameterTokens = (
key: string,
prop: unknown,
context: {
encoder: Encoder
constants: ReturnType<typeof getModelConstants>
},
): number => {
const { encoder, constants } = context
let tokens = constants.propKey

const lastMessage = simplifiedMessages.at(-1)
// Early return if prop is not an object
if (typeof prop !== "object" || prop === null) {
return tokens
}

if (lastMessage?.role === "assistant") {
inputMessages = simplifiedMessages.slice(0, -1)
outputMessages = [lastMessage]
// Type assertion for parameter properties
const param = prop as {
type?: string
description?: string
enum?: Array<unknown>
[key: string]: unknown
}

// @ts-expect-error TS can't infer from arr.filter()
const inputTokens = countTokens(inputMessages)
// @ts-expect-error TS can't infer from arr.filter()
const outputTokens = countTokens(outputMessages)
const paramName = key
const paramType = param.type || "string"
let paramDesc = param.description || ""

// Handle enum values
if (param.enum && Array.isArray(param.enum)) {
tokens += constants.enumInit
for (const item of param.enum) {
tokens += constants.enumItem
tokens += encoder.encode(String(item)).length
}
}

// Clean up description
if (paramDesc.endsWith(".")) {
paramDesc = paramDesc.slice(0, -1)
}

// Encode the main parameter line
const line = `${paramName}:${paramType}:${paramDesc}`
tokens += encoder.encode(line).length

// Handle additional properties (excluding standard ones)
const excludedKeys = new Set(["type", "description", "enum"])
for (const propertyName of Object.keys(param)) {
if (!excludedKeys.has(propertyName)) {
const propertyValue = param[propertyName]
const propertyText =
typeof propertyValue === "string" ? propertyValue : (
JSON.stringify(propertyValue)
)
tokens += encoder.encode(`${propertyName}:${propertyText}`).length
}
}

return tokens
}

/**
* Calculate tokens for function parameters
*/
const calculateParametersTokens = (
parameters: unknown,
encoder: Encoder,
constants: ReturnType<typeof getModelConstants>,
): number => {
if (!parameters || typeof parameters !== "object") {
return 0
}

const params = parameters as Record<string, unknown>
let tokens = 0

for (const [key, value] of Object.entries(params)) {
if (key === "properties") {
const properties = value as Record<string, unknown>
if (Object.keys(properties).length > 0) {
tokens += constants.propInit
for (const propKey of Object.keys(properties)) {
tokens += calculateParameterTokens(propKey, properties[propKey], {
encoder,
constants,
})
}
}
} else {
const paramText =
typeof value === "string" ? value : JSON.stringify(value)
tokens += encoder.encode(`${key}:${paramText}`).length
}
}

return tokens
}

/**
* Calculate tokens for a single tool
*/
const calculateToolTokens = (
tool: Tool,
encoder: Encoder,
constants: ReturnType<typeof getModelConstants>,
): number => {
let tokens = constants.funcInit
const func = tool.function
const fName = func.name
let fDesc = func.description || ""
if (fDesc.endsWith(".")) {
fDesc = fDesc.slice(0, -1)
}
const line = fName + ":" + fDesc
tokens += encoder.encode(line).length
if (
typeof func.parameters === "object" // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
&& func.parameters !== null
) {
tokens += calculateParametersTokens(func.parameters, encoder, constants)
}
return tokens
}

/**
* Calculate token count for tools based on model
*/
export const numTokensForTools = (
tools: Array<Tool>,
encoder: Encoder,
constants: ReturnType<typeof getModelConstants>,
): number => {
let funcTokenCount = 0
for (const tool of tools) {
funcTokenCount += calculateToolTokens(tool, encoder, constants)
}
funcTokenCount += constants.funcEnd
return funcTokenCount
}

/**
* Calculate the token count of messages, supporting multiple GPT encoders
*/
export const getTokenCount = async (
payload: ChatCompletionsPayload,
model: Model,
): Promise<{ input: number; output: number }> => {
// Get tokenizer string
const tokenizer = getTokenizerFromModel(model)

// Get corresponding encoder module
const encoder = await getEncodeChatFunction(tokenizer)

const simplifiedMessages = payload.messages
const inputMessages = simplifiedMessages.filter(
(msg) => msg.role !== "assistant",
)
const outputMessages = simplifiedMessages.filter(
(msg) => msg.role === "assistant",
)

const constants = getModelConstants(model)
let inputTokens = calculateTokens(inputMessages, encoder, constants)
if (payload.tools && payload.tools.length > 0) {
inputTokens += numTokensForTools(payload.tools, encoder, constants)
}
const outputTokens = calculateTokens(outputMessages, encoder, constants)

return {
input: inputTokens,
Expand Down
Loading