|
1 |
| -import { countTokens } from "gpt-tokenizer/model/gpt-4o" |
| 1 | +import type { |
| 2 | + ChatCompletionsPayload, |
| 3 | + ContentPart, |
| 4 | + Message, |
| 5 | + Tool, |
| 6 | + ToolCall, |
| 7 | +} from "~/services/copilot/create-chat-completions" |
| 8 | +import type { Model } from "~/services/copilot/get-models" |
2 | 9 |
|
3 |
| -import type { Message } from "~/services/copilot/create-chat-completions" |
| 10 | +// Encoder type mapping |
| 11 | +const ENCODING_MAP = { |
| 12 | + o200k_base: () => import("gpt-tokenizer/encoding/o200k_base"), |
| 13 | + cl100k_base: () => import("gpt-tokenizer/encoding/cl100k_base"), |
| 14 | + p50k_base: () => import("gpt-tokenizer/encoding/p50k_base"), |
| 15 | + p50k_edit: () => import("gpt-tokenizer/encoding/p50k_edit"), |
| 16 | + r50k_base: () => import("gpt-tokenizer/encoding/r50k_base"), |
| 17 | +} as const |
4 | 18 |
|
5 |
| -export const getTokenCount = (messages: Array<Message>) => { |
6 |
| - const simplifiedMessages = messages.map((message) => { |
7 |
| - let content = "" |
8 |
| - if (typeof message.content === "string") { |
9 |
| - content = message.content |
10 |
| - } else if (Array.isArray(message.content)) { |
11 |
| - content = message.content |
12 |
| - .filter((part) => part.type === "text") |
13 |
| - .map((part) => (part as { text: string }).text) |
14 |
| - .join("") |
| 19 | +type SupportedEncoding = keyof typeof ENCODING_MAP |
| 20 | + |
| 21 | +// Define encoder interface |
| 22 | +interface Encoder { |
| 23 | + encode: (text: string) => Array<number> |
| 24 | +} |
| 25 | + |
| 26 | +// Cache loaded encoders to avoid repeated imports |
| 27 | +const encodingCache = new Map<string, Encoder>() |
| 28 | + |
| 29 | +/** |
| 30 | + * Calculate tokens for tool calls |
| 31 | + */ |
| 32 | +const calculateToolCallsTokens = ( |
| 33 | + toolCalls: Array<ToolCall>, |
| 34 | + encoder: Encoder, |
| 35 | + constants: ReturnType<typeof getModelConstants>, |
| 36 | +): number => { |
| 37 | + let tokens = 0 |
| 38 | + for (const toolCall of toolCalls) { |
| 39 | + tokens += constants.funcInit |
| 40 | + tokens += encoder.encode(JSON.stringify(toolCall)).length |
| 41 | + } |
| 42 | + tokens += constants.funcEnd |
| 43 | + return tokens |
| 44 | +} |
| 45 | + |
| 46 | +/** |
| 47 | + * Calculate tokens for content parts |
| 48 | + */ |
| 49 | +const calculateContentPartsTokens = ( |
| 50 | + contentParts: Array<ContentPart>, |
| 51 | + encoder: Encoder, |
| 52 | +): number => { |
| 53 | + let tokens = 0 |
| 54 | + for (const part of contentParts) { |
| 55 | + if (part.type === "image_url") { |
| 56 | + tokens += encoder.encode(part.image_url.url).length + 85 |
| 57 | + } else if (part.text) { |
| 58 | + tokens += encoder.encode(part.text).length |
| 59 | + } |
| 60 | + } |
| 61 | + return tokens |
| 62 | +} |
| 63 | + |
| 64 | +/** |
| 65 | + * Calculate tokens for a single message |
| 66 | + */ |
| 67 | +const calculateMessageTokens = ( |
| 68 | + message: Message, |
| 69 | + encoder: Encoder, |
| 70 | + constants: ReturnType<typeof getModelConstants>, |
| 71 | +): number => { |
| 72 | + const tokensPerMessage = 3 |
| 73 | + const tokensPerName = 1 |
| 74 | + let tokens = tokensPerMessage |
| 75 | + for (const [key, value] of Object.entries(message)) { |
| 76 | + if (typeof value === "string") { |
| 77 | + tokens += encoder.encode(value).length |
| 78 | + } |
| 79 | + if (key === "name") { |
| 80 | + tokens += tokensPerName |
| 81 | + } |
| 82 | + if (key === "tool_calls") { |
| 83 | + tokens += calculateToolCallsTokens( |
| 84 | + value as Array<ToolCall>, |
| 85 | + encoder, |
| 86 | + constants, |
| 87 | + ) |
| 88 | + } |
| 89 | + if (key === "content" && Array.isArray(value)) { |
| 90 | + tokens += calculateContentPartsTokens( |
| 91 | + value as Array<ContentPart>, |
| 92 | + encoder, |
| 93 | + ) |
| 94 | + } |
| 95 | + } |
| 96 | + return tokens |
| 97 | +} |
| 98 | + |
| 99 | +/** |
| 100 | + * Calculate tokens using custom algorithm |
| 101 | + */ |
| 102 | +const calculateTokens = ( |
| 103 | + messages: Array<Message>, |
| 104 | + encoder: Encoder, |
| 105 | + constants: ReturnType<typeof getModelConstants>, |
| 106 | +): number => { |
| 107 | + if (messages.length === 0) { |
| 108 | + return 0 |
| 109 | + } |
| 110 | + let numTokens = 0 |
| 111 | + for (const message of messages) { |
| 112 | + numTokens += calculateMessageTokens(message, encoder, constants) |
| 113 | + } |
| 114 | + // every reply is primed with <|start|>assistant<|message|> |
| 115 | + numTokens += 3 |
| 116 | + return numTokens |
| 117 | +} |
| 118 | + |
| 119 | +/** |
| 120 | + * Get the corresponding encoder module based on encoding type |
| 121 | + */ |
| 122 | +const getEncodeChatFunction = async (encoding: string): Promise<Encoder> => { |
| 123 | + if (encodingCache.has(encoding)) { |
| 124 | + const cached = encodingCache.get(encoding) |
| 125 | + if (cached) { |
| 126 | + return cached |
15 | 127 | }
|
16 |
| - return { ...message, content } |
17 |
| - }) |
| 128 | + } |
| 129 | + |
| 130 | + const supportedEncoding = encoding as SupportedEncoding |
| 131 | + if (!(supportedEncoding in ENCODING_MAP)) { |
| 132 | + const fallbackModule = (await ENCODING_MAP.o200k_base()) as Encoder |
| 133 | + encodingCache.set(encoding, fallbackModule) |
| 134 | + return fallbackModule |
| 135 | + } |
| 136 | + |
| 137 | + const encodingModule = (await ENCODING_MAP[supportedEncoding]()) as Encoder |
| 138 | + encodingCache.set(encoding, encodingModule) |
| 139 | + return encodingModule |
| 140 | +} |
| 141 | + |
| 142 | +/** |
| 143 | + * Get tokenizer type from model information |
| 144 | + */ |
| 145 | +export const getTokenizerFromModel = (model: Model): string => { |
| 146 | + return model.capabilities.tokenizer || "o200k_base" |
| 147 | +} |
| 148 | + |
| 149 | +/** |
| 150 | + * Get model-specific constants for token calculation |
| 151 | + */ |
| 152 | +const getModelConstants = (model: Model) => { |
| 153 | + return model.id === "gpt-3.5-turbo" || model.id === "gpt-4" ? |
| 154 | + { |
| 155 | + funcInit: 10, |
| 156 | + propInit: 3, |
| 157 | + propKey: 3, |
| 158 | + enumInit: -3, |
| 159 | + enumItem: 3, |
| 160 | + funcEnd: 12, |
| 161 | + } |
| 162 | + : { |
| 163 | + funcInit: 7, |
| 164 | + propInit: 3, |
| 165 | + propKey: 3, |
| 166 | + enumInit: -3, |
| 167 | + enumItem: 3, |
| 168 | + funcEnd: 12, |
| 169 | + } |
| 170 | +} |
18 | 171 |
|
19 |
| - let inputMessages = simplifiedMessages.filter((message) => { |
20 |
| - return message.role !== "tool" |
21 |
| - }) |
22 |
| - let outputMessages: typeof simplifiedMessages = [] |
| 172 | +/** |
| 173 | + * Calculate tokens for a single parameter |
| 174 | + */ |
| 175 | +const calculateParameterTokens = ( |
| 176 | + key: string, |
| 177 | + prop: unknown, |
| 178 | + context: { |
| 179 | + encoder: Encoder |
| 180 | + constants: ReturnType<typeof getModelConstants> |
| 181 | + }, |
| 182 | +): number => { |
| 183 | + const { encoder, constants } = context |
| 184 | + let tokens = constants.propKey |
23 | 185 |
|
24 |
| - const lastMessage = simplifiedMessages.at(-1) |
| 186 | + // Early return if prop is not an object |
| 187 | + if (typeof prop !== "object" || prop === null) { |
| 188 | + return tokens |
| 189 | + } |
25 | 190 |
|
26 |
| - if (lastMessage?.role === "assistant") { |
27 |
| - inputMessages = simplifiedMessages.slice(0, -1) |
28 |
| - outputMessages = [lastMessage] |
| 191 | + // Type assertion for parameter properties |
| 192 | + const param = prop as { |
| 193 | + type?: string |
| 194 | + description?: string |
| 195 | + enum?: Array<unknown> |
| 196 | + [key: string]: unknown |
29 | 197 | }
|
30 | 198 |
|
31 |
| - // @ts-expect-error TS can't infer from arr.filter() |
32 |
| - const inputTokens = countTokens(inputMessages) |
33 |
| - // @ts-expect-error TS can't infer from arr.filter() |
34 |
| - const outputTokens = countTokens(outputMessages) |
| 199 | + const paramName = key |
| 200 | + const paramType = param.type || "string" |
| 201 | + let paramDesc = param.description || "" |
| 202 | + |
| 203 | + // Handle enum values |
| 204 | + if (param.enum && Array.isArray(param.enum)) { |
| 205 | + tokens += constants.enumInit |
| 206 | + for (const item of param.enum) { |
| 207 | + tokens += constants.enumItem |
| 208 | + tokens += encoder.encode(String(item)).length |
| 209 | + } |
| 210 | + } |
| 211 | + |
| 212 | + // Clean up description |
| 213 | + if (paramDesc.endsWith(".")) { |
| 214 | + paramDesc = paramDesc.slice(0, -1) |
| 215 | + } |
| 216 | + |
| 217 | + // Encode the main parameter line |
| 218 | + const line = `${paramName}:${paramType}:${paramDesc}` |
| 219 | + tokens += encoder.encode(line).length |
| 220 | + |
| 221 | + // Handle additional properties (excluding standard ones) |
| 222 | + const excludedKeys = new Set(["type", "description", "enum"]) |
| 223 | + for (const propertyName of Object.keys(param)) { |
| 224 | + if (!excludedKeys.has(propertyName)) { |
| 225 | + const propertyValue = param[propertyName] |
| 226 | + const propertyText = |
| 227 | + typeof propertyValue === "string" ? propertyValue : ( |
| 228 | + JSON.stringify(propertyValue) |
| 229 | + ) |
| 230 | + tokens += encoder.encode(`${propertyName}:${propertyText}`).length |
| 231 | + } |
| 232 | + } |
| 233 | + |
| 234 | + return tokens |
| 235 | +} |
| 236 | + |
| 237 | +/** |
| 238 | + * Calculate tokens for function parameters |
| 239 | + */ |
| 240 | +const calculateParametersTokens = ( |
| 241 | + parameters: unknown, |
| 242 | + encoder: Encoder, |
| 243 | + constants: ReturnType<typeof getModelConstants>, |
| 244 | +): number => { |
| 245 | + if (!parameters || typeof parameters !== "object") { |
| 246 | + return 0 |
| 247 | + } |
| 248 | + |
| 249 | + const params = parameters as Record<string, unknown> |
| 250 | + let tokens = 0 |
| 251 | + |
| 252 | + for (const [key, value] of Object.entries(params)) { |
| 253 | + if (key === "properties") { |
| 254 | + const properties = value as Record<string, unknown> |
| 255 | + if (Object.keys(properties).length > 0) { |
| 256 | + tokens += constants.propInit |
| 257 | + for (const propKey of Object.keys(properties)) { |
| 258 | + tokens += calculateParameterTokens(propKey, properties[propKey], { |
| 259 | + encoder, |
| 260 | + constants, |
| 261 | + }) |
| 262 | + } |
| 263 | + } |
| 264 | + } else { |
| 265 | + const paramText = |
| 266 | + typeof value === "string" ? value : JSON.stringify(value) |
| 267 | + tokens += encoder.encode(`${key}:${paramText}`).length |
| 268 | + } |
| 269 | + } |
| 270 | + |
| 271 | + return tokens |
| 272 | +} |
| 273 | + |
| 274 | +/** |
| 275 | + * Calculate tokens for a single tool |
| 276 | + */ |
| 277 | +const calculateToolTokens = ( |
| 278 | + tool: Tool, |
| 279 | + encoder: Encoder, |
| 280 | + constants: ReturnType<typeof getModelConstants>, |
| 281 | +): number => { |
| 282 | + let tokens = constants.funcInit |
| 283 | + const func = tool.function |
| 284 | + const fName = func.name |
| 285 | + let fDesc = func.description || "" |
| 286 | + if (fDesc.endsWith(".")) { |
| 287 | + fDesc = fDesc.slice(0, -1) |
| 288 | + } |
| 289 | + const line = fName + ":" + fDesc |
| 290 | + tokens += encoder.encode(line).length |
| 291 | + if ( |
| 292 | + typeof func.parameters === "object" // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition |
| 293 | + && func.parameters !== null |
| 294 | + ) { |
| 295 | + tokens += calculateParametersTokens(func.parameters, encoder, constants) |
| 296 | + } |
| 297 | + return tokens |
| 298 | +} |
| 299 | + |
| 300 | +/** |
| 301 | + * Calculate token count for tools based on model |
| 302 | + */ |
| 303 | +export const numTokensForTools = ( |
| 304 | + tools: Array<Tool>, |
| 305 | + encoder: Encoder, |
| 306 | + constants: ReturnType<typeof getModelConstants>, |
| 307 | +): number => { |
| 308 | + let funcTokenCount = 0 |
| 309 | + for (const tool of tools) { |
| 310 | + funcTokenCount += calculateToolTokens(tool, encoder, constants) |
| 311 | + } |
| 312 | + funcTokenCount += constants.funcEnd |
| 313 | + return funcTokenCount |
| 314 | +} |
| 315 | + |
| 316 | +/** |
| 317 | + * Calculate the token count of messages, supporting multiple GPT encoders |
| 318 | + */ |
| 319 | +export const getTokenCount = async ( |
| 320 | + payload: ChatCompletionsPayload, |
| 321 | + model: Model, |
| 322 | +): Promise<{ input: number; output: number }> => { |
| 323 | + // Get tokenizer string |
| 324 | + const tokenizer = getTokenizerFromModel(model) |
| 325 | + |
| 326 | + // Get corresponding encoder module |
| 327 | + const encoder = await getEncodeChatFunction(tokenizer) |
| 328 | + |
| 329 | + const simplifiedMessages = payload.messages |
| 330 | + const inputMessages = simplifiedMessages.filter( |
| 331 | + (msg) => msg.role !== "assistant", |
| 332 | + ) |
| 333 | + const outputMessages = simplifiedMessages.filter( |
| 334 | + (msg) => msg.role === "assistant", |
| 335 | + ) |
| 336 | + |
| 337 | + const constants = getModelConstants(model) |
| 338 | + let inputTokens = calculateTokens(inputMessages, encoder, constants) |
| 339 | + if (payload.tools && payload.tools.length > 0) { |
| 340 | + inputTokens += numTokensForTools(payload.tools, encoder, constants) |
| 341 | + } |
| 342 | + const outputTokens = calculateTokens(outputMessages, encoder, constants) |
35 | 343 |
|
36 | 344 | return {
|
37 | 345 | input: inputTokens,
|
|
0 commit comments