diff --git a/extensions/positron-assistant/src/anthropic.ts b/extensions/positron-assistant/src/anthropic.ts index ff2947e2412a..cfd73123c0b4 100644 --- a/extensions/positron-assistant/src/anthropic.ts +++ b/extensions/positron-assistant/src/anthropic.ts @@ -8,7 +8,7 @@ import * as vscode from 'vscode'; import Anthropic from '@anthropic-ai/sdk'; import { ModelConfig } from './config'; import { isLanguageModelImagePart, LanguageModelImagePart } from './languageModelParts.js'; -import { isChatImagePart, processMessages } from './utils.js'; +import { isChatImagePart, isCacheBreakpointPart, parseCacheBreakpoint, processMessages } from './utils.js'; import { DEFAULT_MAX_TOKEN_OUTPUT } from './constants.js'; import { log } from './extension.js'; @@ -16,10 +16,17 @@ import { log } from './extension.js'; * Options for controlling cache behavior in the Anthropic language model. */ export interface CacheControlOptions { - /** Add a cache control point to the system prompt (default: true). */ + /** Add a cache breakpoint to the system prompt (default: true). */ system?: boolean; } +/** + * Block params that set cache breakpoints. + */ +type CacheControllableBlockParam = Anthropic.TextBlockParam | + Anthropic.ImageBlockParam | + Anthropic.ToolUseBlockParam | + Anthropic.ToolResultBlockParam; export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProvider { name: string; @@ -201,11 +208,10 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv }); } else { // For LanguageModelChatMessage, ensure it has non-empty message content - const processedMessages = processMessages([text]); - if (processedMessages.length === 0) { + messages.push(...toAnthropicMessages([text])); + if (messages.length === 0) { return 0; } - messages.push(...processedMessages.map(toAnthropicMessage)); } const result = await this._client.messages.countTokens({ model: this._config.model, @@ -224,28 +230,39 @@ export class AnthropicLanguageModel implements positron.ai.LanguageModelChatProv } function toAnthropicMessages(messages: vscode.LanguageModelChatMessage2[]): Anthropic.MessageParam[] { - const anthropicMessages = processMessages(messages).map(toAnthropicMessage); + let userMessageIndex = 0; + let assistantMessageIndex = 0; + const anthropicMessages = processMessages(messages).map((message) => { + const source = message.role === vscode.LanguageModelChatMessageRole.User ? + `User message ${userMessageIndex++}` : + `Assistant message ${assistantMessageIndex++}`; + return toAnthropicMessage(message, source); + }); return anthropicMessages; } -function toAnthropicMessage(message: vscode.LanguageModelChatMessage2): Anthropic.MessageParam { +function toAnthropicMessage(message: vscode.LanguageModelChatMessage2, source: string): Anthropic.MessageParam { switch (message.role) { case vscode.LanguageModelChatMessageRole.Assistant: - return toAnthropicAssistantMessage(message); + return toAnthropicAssistantMessage(message, source); case vscode.LanguageModelChatMessageRole.User: - return toAnthropicUserMessage(message); + return toAnthropicUserMessage(message, source); default: throw new Error(`Unsupported message role: ${message.role}`); } } -function toAnthropicAssistantMessage(message: vscode.LanguageModelChatMessage2): Anthropic.MessageParam { +function toAnthropicAssistantMessage(message: vscode.LanguageModelChatMessage2, source: string): Anthropic.MessageParam { const content: Anthropic.ContentBlockParam[] = []; - for (const part of message.content) { + for (let i = 0; i < message.content.length; i++) { + const [part, nextPart] = [message.content[i], message.content[i + 1]]; + const dataPart = nextPart instanceof vscode.LanguageModelDataPart ? nextPart : undefined; if (part instanceof vscode.LanguageModelTextPart) { - content.push(toAnthropicTextBlock(part)); + content.push(toAnthropicTextBlock(part, source, dataPart)); } else if (part instanceof vscode.LanguageModelToolCallPart) { - content.push(toAnthropicToolUseBlock(part)); + content.push(toAnthropicToolUseBlock(part, source, dataPart)); + } else if (part instanceof vscode.LanguageModelDataPart) { + // Skip extra data parts. They're handled in part conversion. } else { throw new Error('Unsupported part type on assistant message'); } @@ -256,20 +273,22 @@ function toAnthropicAssistantMessage(message: vscode.LanguageModelChatMessage2): }; } -function toAnthropicUserMessage(message: vscode.LanguageModelChatMessage2): Anthropic.MessageParam { +function toAnthropicUserMessage(message: vscode.LanguageModelChatMessage2, source: string): Anthropic.MessageParam { const content: Anthropic.ContentBlockParam[] = []; - for (const part of message.content) { + for (let i = 0; i < message.content.length; i++) { + const [part, nextPart] = [message.content[i], message.content[i + 1]]; + const dataPart = nextPart instanceof vscode.LanguageModelDataPart ? nextPart : undefined; if (part instanceof vscode.LanguageModelTextPart) { - content.push(toAnthropicTextBlock(part)); + content.push(toAnthropicTextBlock(part, source, dataPart)); } else if (part instanceof vscode.LanguageModelToolResultPart) { - content.push(toAnthropicToolResultBlock(part)); + content.push(toAnthropicToolResultBlock(part, source, dataPart)); } else if (part instanceof vscode.LanguageModelToolResultPart2) { - content.push(toAnthropicToolResultBlock(part)); + content.push(toAnthropicToolResultBlock(part, source, dataPart)); } else if (part instanceof vscode.LanguageModelDataPart) { if (isChatImagePart(part)) { - content.push(chatImagePartToAnthropicImageBlock(part)); + content.push(chatImagePartToAnthropicImageBlock(part, source, dataPart)); } else { - throw new Error('Unsupported language model data part type on user message'); + // Skip other data parts. } } else { throw new Error('Unsupported part type on user message'); @@ -281,62 +300,106 @@ function toAnthropicUserMessage(message: vscode.LanguageModelChatMessage2): Anth }; } -function toAnthropicTextBlock(part: vscode.LanguageModelTextPart): Anthropic.TextBlockParam { - return { - type: 'text', - text: part.value, - }; +function toAnthropicTextBlock( + part: vscode.LanguageModelTextPart, + source: string, + dataPart?: vscode.LanguageModelDataPart, +): Anthropic.TextBlockParam { + return withCacheControl( + { + type: 'text', + text: part.value, + }, + source, + dataPart, + ); } -function toAnthropicToolUseBlock(part: vscode.LanguageModelToolCallPart): Anthropic.ToolUseBlockParam { - return { - type: 'tool_use', - id: part.callId, - name: part.name, - input: part.input, - }; +function toAnthropicToolUseBlock( + part: vscode.LanguageModelToolCallPart, + source: string, + dataPart?: vscode.LanguageModelDataPart, +): Anthropic.ToolUseBlockParam { + return withCacheControl( + { + type: 'tool_use', + id: part.callId, + name: part.name, + input: part.input, + }, + source, + dataPart, + ); } -function toAnthropicToolResultBlock(part: vscode.LanguageModelToolResultPart): Anthropic.ToolResultBlockParam { +function toAnthropicToolResultBlock( + part: vscode.LanguageModelToolResultPart, + source: string, + dataPart?: vscode.LanguageModelDataPart, +): Anthropic.ToolResultBlockParam { const content: Anthropic.ToolResultBlockParam['content'] = []; - for (const resultPart of part.content) { + for (let i = 0; i < part.content.length; i++) { + const [resultPart, resultNextPart] = [part.content[i], part.content[i + 1]]; + const resultDataPart = resultNextPart instanceof vscode.LanguageModelDataPart ? resultNextPart : undefined; if (resultPart instanceof vscode.LanguageModelTextPart) { - content.push(toAnthropicTextBlock(resultPart)); + content.push(toAnthropicTextBlock(resultPart, source, resultDataPart)); } else if (isLanguageModelImagePart(resultPart)) { - content.push(languageModelImagePartToAnthropicImageBlock(resultPart)); + content.push(languageModelImagePartToAnthropicImageBlock(resultPart, source, resultDataPart)); + } else if (resultPart instanceof vscode.LanguageModelDataPart) { + // Skip data parts. } else { throw new Error('Unsupported part type on tool result part content'); } } - return { - type: 'tool_result', - tool_use_id: part.callId, - content, - }; + return withCacheControl( + { + type: 'tool_result', + tool_use_id: part.callId, + content, + }, + source, + dataPart, + ); } -function chatImagePartToAnthropicImageBlock(part: vscode.LanguageModelDataPart): Anthropic.ImageBlockParam { - return { - type: 'image', - source: { - type: 'base64', - // We may pass an unsupported mime type; let Anthropic throw the error. - media_type: part.mimeType as Anthropic.Base64ImageSource['media_type'], - data: Buffer.from(part.data).toString('base64'), +function chatImagePartToAnthropicImageBlock( + part: vscode.LanguageModelDataPart, + source: string, + dataPart?: vscode.LanguageModelDataPart, +): Anthropic.ImageBlockParam { + return withCacheControl( + { + type: 'image', + source: { + type: 'base64', + // We may pass an unsupported mime type; let Anthropic throw the error. + media_type: part.mimeType as Anthropic.Base64ImageSource['media_type'], + data: Buffer.from(part.data).toString('base64'), + }, }, - }; + source, + dataPart, + ); } -function languageModelImagePartToAnthropicImageBlock(part: LanguageModelImagePart): Anthropic.ImageBlockParam { - return { - type: 'image', - source: { - type: 'base64', - // We may pass an unsupported mime type; let Anthropic throw the error. - media_type: part.value.mimeType as Anthropic.Base64ImageSource['media_type'], - data: part.value.base64, +function languageModelImagePartToAnthropicImageBlock( + part: LanguageModelImagePart, + source: string, + dataPart?: vscode.LanguageModelDataPart, +): Anthropic.ImageBlockParam { + return withCacheControl( + { + type: 'image', + source: { + type: 'base64', + // We may pass an unsupported mime type; let Anthropic throw the error. + media_type: part.value.mimeType as Anthropic.Base64ImageSource['media_type'], + data: part.value.base64, + }, }, - }; + source, + dataPart, + ); } function toAnthropicTools(tools: vscode.LanguageModelChatTool[]): Anthropic.ToolUnion[] { @@ -387,17 +450,30 @@ function toAnthropicSystem(system: unknown, cacheSystem = true): Anthropic.Messa }]; if (cacheSystem) { - // Add a cache control point to the last system prompt block. + // Add a cache breakpoint to the last system prompt block. const lastSystemBlock = anthropicSystem[anthropicSystem.length - 1]; lastSystemBlock.cache_control = { type: 'ephemeral' }; - log.debug(`[anthropic] Adding cache control point to system prompt`); + log.debug(`[anthropic] Adding cache breakpoint to system prompt`); } return anthropicSystem; } - // Pass the system prompt through as-is. - // We may pass an invalid system prompt; let Anthropic throw the error. - return system as Anthropic.MessageCreateParams['system']; + + // Check if it's an array of parts. + if (Array.isArray(system) && system.every(part => (part instanceof vscode.LanguageModelTextPart) || + (part instanceof vscode.LanguageModelDataPart))) { + const anthropicSystem: Anthropic.MessageCreateParams['system'] = []; + for (let i = 0; i < system.length; i++) { + const [part, nextPart] = [system[i], system[i + 1]]; + const dataPart = nextPart instanceof vscode.LanguageModelDataPart ? nextPart : undefined; + if (part instanceof vscode.LanguageModelTextPart) { + anthropicSystem.push(toAnthropicTextBlock(part, 'System prompt', dataPart)); + } + } + return anthropicSystem; + } + + throw new Error(`Unexpected system prompt value`); } function isCacheControlOptions(options: unknown): options is CacheControlOptions { @@ -407,3 +483,25 @@ function isCacheControlOptions(options: unknown): options is CacheControlOptions const cacheControlOptions = options as CacheControlOptions; return cacheControlOptions.system === undefined || typeof cacheControlOptions.system === 'boolean'; } + +function withCacheControl( + part: T, + source: string, + dataPart: vscode.LanguageModelDataPart | undefined, +): T { + if (!isCacheBreakpointPart(dataPart)) { + return part; + } + + try { + const cachBreakpoint = parseCacheBreakpoint(dataPart); + log.debug(`[anthropic] Adding cache breakpoint to ${part.type} part. Source: ${source}`); + return { + ...part, + cache_control: cachBreakpoint, + }; + } catch (error) { + log.error(`[anthropic] Failed to parse cache breakpoint: ${error}`); + return part; + } +} diff --git a/extensions/positron-assistant/src/models.ts b/extensions/positron-assistant/src/models.ts index 4689a9be283a..87d3a4c8e24f 100644 --- a/extensions/positron-assistant/src/models.ts +++ b/extensions/positron-assistant/src/models.ts @@ -102,7 +102,7 @@ class EchoLanguageModel implements positron.ai.LanguageModelChatProvider { token: vscode.CancellationToken ): Promise { const _messages = toAIMessage(messages); - const message = _messages[_messages.length - 1]; + const message = _messages[0]; if (typeof message.content === 'string') { message.content = [{ type: 'text', text: message.content }]; diff --git a/extensions/positron-assistant/src/participants.ts b/extensions/positron-assistant/src/participants.ts index b9d635ea1622..e8293a1a2825 100644 --- a/extensions/positron-assistant/src/participants.ts +++ b/extensions/positron-assistant/src/participants.ts @@ -10,7 +10,7 @@ import * as fs from 'fs'; import * as xml from './xml.js'; import { MARKDOWN_DIR, TOOL_TAG_REQUIRES_WORKSPACE } from './constants'; -import { isChatImageMimeType, isTextEditRequest, isWorkspaceOpen, toLanguageModelChatMessage, uriToString } from './utils'; +import { isChatImageMimeType, isTextEditRequest, isWorkspaceOpen, languageModelCacheBreakpointPart, toLanguageModelChatMessage, uriToString } from './utils'; import { quartoHandler } from './commands/quarto'; import { PositronAssistantToolName } from './types.js'; import { StreamingTagLexer } from './streamingTagLexer.js'; @@ -266,15 +266,23 @@ abstract class PositronAssistantParticipant implements IPositronAssistantPartici // Note that context.history excludes tool calls and results. const messages = toLanguageModelChatMessage(context.history); + // Add the user's prompt. + const userPromptPart = new vscode.LanguageModelTextPart(request.prompt); + messages.push(vscode.LanguageModelChatMessage.User([userPromptPart])); + + // Add cache breakpoints to at-most the last 2 user messages. + addCacheControlBreakpointPartsToLastUserMessages(messages, 2); + // Add a user message containing context about the request, workspace, running sessions, etc. + // NOTE: We add the context message after the user prompt so that the context message is + // not cached. Since the context message is transiently added to each request, caching it + // will write a prompt prefix to the cache that will never be read. We will want to keep + // an eye on whether the order of user prompt and context message affects model responses. const contextMessage = await this.getContextMessage(request, response, positronContext); if (contextMessage) { messages.push(contextMessage); } - // Add the user's prompt. - messages.push(vscode.LanguageModelChatMessage.User(request.prompt)); - // Send the request to the language model. await this.sendLanguageModelRequest(request, response, token, messages, tools, system); @@ -841,3 +849,35 @@ export interface TextProcessor { /** Process any unhandled text at the end of the stream. */ flush(): void | Promise; } + +/** + * Add cache breakpoints (for Anthropic prompt caching) to the last few user messages. + * + * @param messages The chat messages to modify. + * @param maxCacheBreakpointParts The maximum number of cache breakpoints to add. + * Note that Anthropic supports a maximum of 4 cache controls per request and that + * we may also cache tools and the system prompt. + */ +function addCacheControlBreakpointPartsToLastUserMessages( + messages: vscode.LanguageModelChatMessage2[], + maxCacheBreakpointParts: number, +) { + let numCacheControlParts = 0; + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message.role !== vscode.LanguageModelChatMessageRole.User) { + continue; + } + const lastPart = message.content.at(-1); + if (!lastPart) { + continue; + } + log.debug(`[participant] Adding cache breakpoint to user message part: ${lastPart.constructor.name}`); + message.content.push(languageModelCacheBreakpointPart()); + numCacheControlParts++; + if (numCacheControlParts >= maxCacheBreakpointParts) { + // We only want to cache the last two user messages. + break; + } + } +} diff --git a/extensions/positron-assistant/src/test/anthropic.test.ts b/extensions/positron-assistant/src/test/anthropic.test.ts index a7dc308c2c8b..160df38509f9 100644 --- a/extensions/positron-assistant/src/test/anthropic.test.ts +++ b/extensions/positron-assistant/src/test/anthropic.test.ts @@ -9,7 +9,7 @@ import * as vscode from 'vscode'; import * as sinon from 'sinon'; import { AnthropicLanguageModel, CacheControlOptions } from '../anthropic'; import { ModelConfig } from '../config'; -import { EMPTY_TOOL_RESULT_PLACEHOLDER } from '../utils.js'; +import { EMPTY_TOOL_RESULT_PLACEHOLDER, languageModelCacheBreakpointPart } from '../utils.js'; import Anthropic from '@anthropic-ai/sdk'; import { MessageStream } from '@anthropic-ai/sdk/lib/MessageStream.js'; import { mock } from './utils.js'; @@ -37,8 +37,8 @@ type ChatMessageValidateInfo = { suite('AnthropicLanguageModel', () => { let model: AnthropicLanguageModel; let mockClient: MockAnthropicClient; - let mockProgress: vscode.Progress; - let mockCancellationToken: vscode.CancellationToken; + let progress: vscode.Progress; + let cancellationToken: vscode.CancellationToken; setup(() => { // Create a mock Anthropic client @@ -58,19 +58,37 @@ suite('AnthropicLanguageModel', () => { model = new AnthropicLanguageModel(config, mockClient as unknown as Anthropic); // Create mock progress - mockProgress = { + progress = { report: sinon.stub() }; // Create a cancellation token const cancellationTokenSource = new vscode.CancellationTokenSource(); - mockCancellationToken = cancellationTokenSource.token; + cancellationToken = cancellationTokenSource.token; }); teardown(() => { sinon.restore(); }); + /** Send the request to the model and return the internal request made to the Anthropic API client. */ + async function provideLanguageModelResponse( + messages: vscode.LanguageModelChatMessage2[], + options: vscode.LanguageModelChatRequestOptions = {}, + ) { + await model.provideLanguageModelResponse( + messages, + options, + 'test-extension', + progress, + cancellationToken + ); + + sinon.assert.calledOnce(mockClient.messages.stream); + const body = mockClient.messages.stream.getCall(0).args[0]; + return { body }; + } + /** * Test the filtering behavior by checking the messages passed to Anthropic * when a message with empty LanguageModelTextPart content is included. @@ -115,20 +133,10 @@ suite('AnthropicLanguageModel', () => { const numOfMessagesToKeep = messagesWithVariousContent.filter(m => m.keep).length; // Call the method under test - await model.provideLanguageModelResponse( - messages, - {}, - 'test-extension', - mockProgress, - mockCancellationToken - ); - - // Check that messages were filtered correctly - const streamCall = mockClient.messages.stream.getCall(0); - assert.ok(streamCall, 'Stream method was not called'); + const { body } = await provideLanguageModelResponse(messages); // We expect two messages with non-empty content to be passed to the Anthropic client - const messagesPassedToAnthropicClient = streamCall.args[0].messages; + const messagesPassedToAnthropicClient = body.messages; assert.strictEqual(messagesPassedToAnthropicClient.length, numOfMessagesToKeep, 'Only non-empty messages should be passed to the Anthropic client'); // Verify each passed message has the non-empty content we expect @@ -227,18 +235,9 @@ suite('AnthropicLanguageModel', () => { test(`${testCase.testName}`, async () => { const messages = [testCase.message]; - await model.provideLanguageModelResponse( - messages, - {}, - 'test-extension', - mockProgress, - mockCancellationToken - ); - - const streamCall = mockClient.messages.stream.getCall(0); - assert.ok(streamCall, 'Stream method was not called'); + const { body } = await provideLanguageModelResponse(messages); - const messagesPassedToAnthropicClient = streamCall.args[0].messages; + const messagesPassedToAnthropicClient = body.messages; assert.strictEqual(messagesPassedToAnthropicClient.length, 1, 'Exactly one message should be passed to the Anthropic client'); assert.ok(typeof messagesPassedToAnthropicClient[0].content !== 'string', 'Expected a content block object, got a string'); @@ -247,125 +246,267 @@ suite('AnthropicLanguageModel', () => { }); }); - test('provideLanguageModelResponse cache_control default behavior', async () => { - const toolA = { - name: 'toolA', - description: 'Tool A', - inputSchema: { type: 'object' as const, properties: {} } - } satisfies vscode.LanguageModelChatTool; - const toolB = { - name: 'toolB', - description: 'Tool B', - inputSchema: { type: 'object' as const, properties: {} } - } satisfies vscode.LanguageModelChatTool; - const system = 'System prompt'; - - // Call the method under test. - await model.provideLanguageModelResponse( - [ - vscode.LanguageModelChatMessage.User('Hi'), - vscode.LanguageModelChatMessage.User('Bye'), - ], - { - // Define the request tools, not sorted by name, so we can test sorting behavior. - tools: [toolB, toolA], - modelOptions: { system }, - }, - 'test-extension', - mockProgress, - mockCancellationToken - ); + suite('provideLanguageModelResponse cache_control behavior', () => { + test('caches system prompt by default', async () => { + const toolA = { + name: 'toolA', + description: 'Tool A', + inputSchema: { type: 'object' as const, properties: {} } + } satisfies vscode.LanguageModelChatTool; + const toolB = { + name: 'toolB', + description: 'Tool B', + inputSchema: { type: 'object' as const, properties: {} } + } satisfies vscode.LanguageModelChatTool; + const system = 'System prompt'; + + // Call the method under test. + const { body } = await provideLanguageModelResponse( + [ + vscode.LanguageModelChatMessage.User('Hi'), + vscode.LanguageModelChatMessage.User('Bye'), + ], + { + // Define the request tools, not sorted by name, so we can test sorting behavior. + tools: [toolB, toolA], + modelOptions: { system }, + }, + ); - sinon.assert.calledOnce(mockClient.messages.stream); - const body = mockClient.messages.stream.getCall(0).args[0]; + assert.deepStrictEqual(body.tools, [ + { + name: toolA.name, + description: toolA.description, + input_schema: toolA.inputSchema, + }, + { + name: toolB.name, + description: toolB.description, + input_schema: toolB.inputSchema, + }, + ] satisfies Anthropic.ToolUnion[], 'Unexpected tools in request body'); - assert.deepStrictEqual(body.tools, [ - { - name: toolA.name, - description: toolA.description, - input_schema: toolA.inputSchema, - }, - { - name: toolB.name, - description: toolB.description, - input_schema: toolB.inputSchema, - }, - ] satisfies Anthropic.ToolUnion[], 'Unexpected tools in request body'); + assert.deepStrictEqual(body.system, [ + { + type: 'text', + text: system, + cache_control: { type: 'ephemeral' }, + }, + ] satisfies Anthropic.TextBlockParam[], 'Unexpected system prompt in request body'); - assert.deepStrictEqual(body.system, [ - { - type: 'text', - text: system, - cache_control: { type: 'ephemeral' }, - }, - ] satisfies Anthropic.TextBlockParam[], 'Unexpected system prompt in request body'); + assert.deepStrictEqual(body.messages, [ + { role: 'user', content: [{ type: 'text', text: 'Hi' }] }, + { role: 'user', content: [{ type: 'text', text: 'Bye' }] }, + ] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); + }); - assert.deepStrictEqual(body.messages, [ - { role: 'user', content: [{ type: 'text', text: 'Hi' }] }, - { role: 'user', content: [{ type: 'text', text: 'Bye' }] }, - ] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); - }); + test('does not cache system prompt when disabled', async () => { + const toolA = { + name: 'toolA', + description: 'Tool A', + inputSchema: { type: 'object' as const, properties: {} } + } satisfies vscode.LanguageModelChatTool; + const toolB = { + name: 'toolB', + description: 'Tool B', + inputSchema: { type: 'object' as const, properties: {} } + } satisfies vscode.LanguageModelChatTool; + const system = 'System prompt'; + + // Call the method under test with no cacheControl options to test default behavior. + const { body } = await provideLanguageModelResponse( + [ + vscode.LanguageModelChatMessage.User('Hi'), + vscode.LanguageModelChatMessage.User('Bye'), + ], + { + // Define the request tools, not sorted by name, so we can test sorting behavior. + tools: [toolB, toolA], + modelOptions: { + system, + cacheControl: { + system: false, + } satisfies CacheControlOptions, + }, + }, + ); - test('provideLanguageModelResponse cache_control all disabled', async () => { - const toolA = { - name: 'toolA', - description: 'Tool A', - inputSchema: { type: 'object' as const, properties: {} } - } satisfies vscode.LanguageModelChatTool; - const toolB = { - name: 'toolB', - description: 'Tool B', - inputSchema: { type: 'object' as const, properties: {} } - } satisfies vscode.LanguageModelChatTool; - const system = 'System prompt'; - - // Call the method under test with no cacheControl options to test default behavior. - await model.provideLanguageModelResponse( - [ - vscode.LanguageModelChatMessage.User('Hi'), - vscode.LanguageModelChatMessage.User('Bye'), - ], - { - // Define the request tools, not sorted by name, so we can test sorting behavior. - tools: [toolB, toolA], - modelOptions: { - system, - cacheControl: { - system: false, - } satisfies CacheControlOptions, + assert.deepStrictEqual(body.tools, [ + { + name: toolA.name, + description: toolA.description, + input_schema: toolA.inputSchema, }, - }, - 'test-extension', - mockProgress, - mockCancellationToken - ); + { + name: toolB.name, + description: toolB.description, + input_schema: toolB.inputSchema, + }, + ] satisfies Anthropic.ToolUnion[], 'Unexpected tools in request body'); - sinon.assert.calledOnce(mockClient.messages.stream); - const body = mockClient.messages.stream.getCall(0).args[0]; + assert.deepStrictEqual(body.system, [ + { + type: 'text', + text: system, + }, + ] satisfies Anthropic.TextBlockParam[], 'Unexpected system prompt in request body'); - assert.deepStrictEqual(body.tools, [ - { - name: toolA.name, - description: toolA.description, - input_schema: toolA.inputSchema, - }, - { - name: toolB.name, - description: toolB.description, - input_schema: toolB.inputSchema, - }, - ] satisfies Anthropic.ToolUnion[], 'Unexpected tools in request body'); + assert.deepStrictEqual(body.messages, [ + { role: 'user', content: [{ type: 'text', text: 'Hi' }] }, + { role: 'user', content: [{ type: 'text', text: 'Bye' }] }, + ] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); + }); - assert.deepStrictEqual(body.system, [ - { - type: 'text', - text: system, - }, - ] satisfies Anthropic.TextBlockParam[], 'Unexpected system prompt in request body'); + test('applies cache_control to previous text part in same message', async () => { + const { body } = await provideLanguageModelResponse([ + vscode.LanguageModelChatMessage2.User([ + new vscode.LanguageModelTextPart('Hello world'), + languageModelCacheBreakpointPart(), + ]) + ]); + + assert.deepStrictEqual(body.messages, [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'Hello world', + cache_control: { type: 'ephemeral' } + } + ] + } + ] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); + }); - assert.deepStrictEqual(body.messages, [ - { role: 'user', content: [{ type: 'text', text: 'Hi' }] }, - { role: 'user', content: [{ type: 'text', text: 'Bye' }] }, - ] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); + test('applies cache_control to previous tool call part in same message', async () => { + const { body } = await provideLanguageModelResponse([ + vscode.LanguageModelChatMessage2.Assistant([ + new vscode.LanguageModelToolCallPart('call-1', 'test-tool', { input: 'test' }), + languageModelCacheBreakpointPart(), + ]) + ]); + + assert.deepStrictEqual(body.messages, [ + { + role: 'assistant', + content: [ + { + type: 'tool_use', + id: 'call-1', + name: 'test-tool', + input: { input: 'test' }, + cache_control: { type: 'ephemeral' } + } + ] + } + ] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); + }); + + test('applies cache_control to previous tool result part in same message', async () => { + const { body } = await provideLanguageModelResponse([ + vscode.LanguageModelChatMessage2.User([ + new vscode.LanguageModelToolResultPart('call-1', [new vscode.LanguageModelTextPart('result')]), + languageModelCacheBreakpointPart() + ]) + ]); + + assert.deepStrictEqual(body.messages, [ + { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: 'call-1', + content: [{ type: 'text', text: 'result' }], + cache_control: { type: 'ephemeral' } + } + ] + } + ] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); + }); + + test('ignores cache_control when there is no previous part', async () => { + const { body } = await provideLanguageModelResponse([ + vscode.LanguageModelChatMessage2.User([ + (languageModelCacheBreakpointPart()), + ]) + ]); + + assert.deepStrictEqual(body.messages, [] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); + }); + + test('applies multiple cache_control parts to respective previous parts', async () => { + const { body } = await provideLanguageModelResponse([ + vscode.LanguageModelChatMessage2.User([ + new vscode.LanguageModelTextPart('First part'), + languageModelCacheBreakpointPart(), + new vscode.LanguageModelTextPart('Second part'), + languageModelCacheBreakpointPart(), + ]) + ]); + + assert.deepStrictEqual(body.messages, [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'First part', + cache_control: { type: 'ephemeral' } + }, + { + type: 'text', + text: 'Second part', + cache_control: { type: 'ephemeral' } + } + ] + } + ] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); + }); + + test('ignores non-cache_control LanguageModelDataPart', async () => { + const { body } = await provideLanguageModelResponse([ + vscode.LanguageModelChatMessage2.User([ + new vscode.LanguageModelTextPart('Hello world'), + vscode.LanguageModelDataPart.json({ data: 'value' }) + ]) + ]); + + assert.deepStrictEqual(body.messages, [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'Hello world' + } + ] + } + ] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); + }); + + test('cache_control part applies to most recent valid content part', async () => { + const { body } = await provideLanguageModelResponse([ + vscode.LanguageModelChatMessage2.User([ + new vscode.LanguageModelTextPart('Hello world'), + new vscode.LanguageModelTextPart(''), + languageModelCacheBreakpointPart(), + ]) + ]); + + assert.deepStrictEqual(body.messages, [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'Hello world', + cache_control: { type: 'ephemeral' } + } + ] + } + ] satisfies Anthropic.MessageCreateParams['messages'], 'Unexpected user messages in request body'); + }); }); }); diff --git a/extensions/positron-assistant/src/test/participants.test.ts b/extensions/positron-assistant/src/test/participants.test.ts index 6c8f2aac63f2..dd5c138b0483 100644 --- a/extensions/positron-assistant/src/test/participants.test.ts +++ b/extensions/positron-assistant/src/test/participants.test.ts @@ -167,7 +167,7 @@ suite('PositronAssistantParticipant', () => { const [messages,] = sendRequestSpy.getCall(0).args; const c = positronChatContext; assert.strictEqual(messages.length, DEFAULT_EXPECTED_MESSAGE_COUNT, `Unexpected messages: ${JSON.stringify(messages)}`); - assertContextMessage(messages[0], + assertContextMessage(messages.at(-1)!, ` @@ -224,7 +224,7 @@ Today's date is: Wednesday 11 June 2025 at 13:30:00 BST const filePath = vscode.workspace.asRelativePath(fileReferenceUri); const attachmentsText = await readFile(path.join(MARKDOWN_DIR, 'prompts', 'chat', 'attachments.md'), 'utf8'); assert.strictEqual(messages.length, DEFAULT_EXPECTED_MESSAGE_COUNT, `Unexpected messages: ${JSON.stringify(messages)}`); - assertContextMessage(messages[0], + assertContextMessage(messages.at(-1)!, ` ${attachmentsText} @@ -255,7 +255,7 @@ ${document.getText()} const filePath = vscode.workspace.asRelativePath(folderReferenceUri); const attachmentsText = await readFile(path.join(MARKDOWN_DIR, 'prompts', 'chat', 'attachments.md'), 'utf8'); assert.strictEqual(messages.length, DEFAULT_EXPECTED_MESSAGE_COUNT, `Unexpected messages: ${JSON.stringify(messages)}`); - assertContextMessage(messages[0], + assertContextMessage(messages.at(-1)!, ` ${attachmentsText} @@ -290,7 +290,7 @@ subfolder/ const filePath = vscode.workspace.asRelativePath(fileReferenceUri); const attachmentsText = await readFile(path.join(MARKDOWN_DIR, 'prompts', 'chat', 'attachments.md'), 'utf8'); assert.strictEqual(messages.length, DEFAULT_EXPECTED_MESSAGE_COUNT, `Unexpected messages: ${JSON.stringify(messages)}`); - assertContextMessage(messages[0], + assertContextMessage(messages.at(-1)!, ` ${attachmentsText} @@ -326,7 +326,7 @@ ${document.getText()} const [messages,] = sendRequestSpy.getCall(0).args; const attachmentsText = await readFile(path.join(MARKDOWN_DIR, 'prompts', 'chat', 'attachments.md'), 'utf8'); assert.strictEqual(messages.length, DEFAULT_EXPECTED_MESSAGE_COUNT, `Unexpected messages: ${JSON.stringify(messages)}`); - assertContextMessage(messages[0], + assertContextMessage(messages.at(-1)!, ` ${attachmentsText} @@ -357,7 +357,7 @@ It should be included in the chat message.`; sinon.assert.calledOnce(sendRequestSpy); const [messages,] = sendRequestSpy.getCall(0).args; assert.strictEqual(messages.length, DEFAULT_EXPECTED_MESSAGE_COUNT, `Unexpected messages: ${JSON.stringify(messages)}`); - assertContextMessage(messages[0], + assertContextMessage(messages.at(-1)!, ` ${llmsTxtContent} `); @@ -386,7 +386,7 @@ ${llmsTxtContent} const [messages,] = sendRequestSpy.getCall(0).args; assert.strictEqual(messages.length, DEFAULT_EXPECTED_MESSAGE_COUNT, `Unexpected messages: ${JSON.stringify(messages)}`); const filePath = vscode.workspace.asRelativePath(fileReferenceUri); - assertContextMessage(messages[0], + assertContextMessage(messages.at(-1)!, ` ${document.getText()} diff --git a/extensions/positron-assistant/src/types.ts b/extensions/positron-assistant/src/types.ts index 9e7eac27645a..854cce38938a 100644 --- a/extensions/positron-assistant/src/types.ts +++ b/extensions/positron-assistant/src/types.ts @@ -16,3 +16,37 @@ export enum PositronAssistantToolName { TextSearch = 'positron_findTextInProject_internal', FileContents = 'positron_getFileContents_internal', } + +/** + * Custom LanguageModelDataPart mime types. + */ +export enum LanguageModelDataPartMimeType { + /** + * Defines a cache breakpoint (e.g. for Anthropic's manual prompt caching). + * + * By matching the Copilot extension, other extensions that use models from either Copilot + * or Positron Assistant can set cache breakpoints with the same mime type. + * See: https://github.com/microsoft/vscode-copilot-chat/blob/6aeac371813be9037e74395186ec5b5b94089245/src/platform/endpoint/common/endpointTypes.ts#L7 + */ + CacheControl = 'cache_control', +} + +/** + * The type of cache breakpoint. + */ +export enum LanguageModelCacheBreakpointType { + /** + * Defines a short-lived cache. + */ + Ephemeral = 'ephemeral', +} + +/** + * Represents a cache breakpoint in a LanguageModelDataPart. + */ +export interface LanguageModelCacheBreakpoint { + /** + * The type of cache breakpoint. + */ + type: LanguageModelCacheBreakpointType; +} diff --git a/extensions/positron-assistant/src/utils.ts b/extensions/positron-assistant/src/utils.ts index 6ac7f1fad139..5b8655df9b84 100644 --- a/extensions/positron-assistant/src/utils.ts +++ b/extensions/positron-assistant/src/utils.ts @@ -5,8 +5,9 @@ import * as vscode from 'vscode'; import * as ai from 'ai'; -import { PositronAssistantToolName } from './types.js'; +import { LanguageModelCacheBreakpoint, LanguageModelCacheBreakpointType, LanguageModelDataPartMimeType, PositronAssistantToolName } from './types.js'; import { isLanguageModelImagePart } from './languageModelParts.js'; +import { log } from './extension.js'; /** * Convert messages from VSCode Language Model format to Vercel AI format. @@ -83,35 +84,38 @@ export function toAIMessage( } } else if (message.role === vscode.LanguageModelChatMessageRole.Assistant) { + const content: ai.AssistantContent = []; + for (const part of message.content) { + if (part instanceof vscode.LanguageModelTextPart) { + content.push({ type: 'text', text: part.value }); + } else if (part instanceof vscode.LanguageModelToolCallPart) { + if ( + !toolResultExperimentalContent && + part.name === PositronAssistantToolName.GetPlot + ) { + // Vercel AI does not yet support image tool results, + // so replace getPlot tool calls with text asking for the plot. + // The corresponding tool result will be replaced with a user + // message containing the plot image. + content.push({ + type: 'text', + text: 'Please provide the current active plot.' + }); + } + content.push({ + type: 'tool-call', + toolCallId: part.callId, + toolName: part.name, + args: part.input, + }); + } else { + // Skip unknown parts. + log.warn(`[vercel] Skipping unsupported part type in assistant message: ${part.constructor.name}`); + } + } aiMessages.push({ role: 'assistant', - content: message.content.map((part) => { - if (part instanceof vscode.LanguageModelTextPart) { - return { type: 'text', text: part.value }; - } else if (part instanceof vscode.LanguageModelToolCallPart) { - if ( - !toolResultExperimentalContent && - part.name === PositronAssistantToolName.GetPlot - ) { - // Vercel AI does not yet support image tool results, - // so replace getPlot tool calls with text asking for the plot. - // The corresponding tool result will be replaced with a user - // message containing the plot image. - return { - type: 'text', - text: 'Please provide the current active plot.' - }; - } - return { - type: 'tool-call', - toolCallId: part.callId, - toolName: part.name, - args: part.input, - }; - } else { - throw new Error(`Unsupported part type on assistant message`); - } - }), + content, }); } } @@ -220,7 +224,7 @@ function getPlotToolResultToAiMessage(part: vscode.LanguageModelToolResultPart): /** * Convert chat participant history into an array of VSCode language model messages. */ -export function toLanguageModelChatMessage(turns: vscode.ChatContext['history']): (vscode.LanguageModelChatMessage | vscode.LanguageModelChatMessage2)[] { +export function toLanguageModelChatMessage(turns: vscode.ChatContext['history']): vscode.LanguageModelChatMessage2[] { return turns.map((turn) => { if (turn instanceof vscode.ChatRequestTurn) { let textValue = turn.prompt; @@ -331,7 +335,9 @@ function removeEmptyTextParts(message: vscode.LanguageModelChatMessage2) { function hasContent(message: vscode.LanguageModelChatMessage2) { return message.content.length > 0 && !message.content.every( - part => part instanceof vscode.LanguageModelTextPart && part.value.trim() === '' + part => (part instanceof vscode.LanguageModelTextPart && part.value.trim() === '') || + // If the only other parts are cache breakpoints, consider the message to have no content. + isCacheBreakpointPart(part) ); } @@ -393,3 +399,46 @@ export function isWorkspaceOpen(): boolean { const workspaceFolders = vscode.workspace.workspaceFolders; return !!workspaceFolders && workspaceFolders.length > 0; } + +/** + * Checks if a given language model part defines a cache breakpoint. + */ +export function isCacheBreakpointPart(part: unknown): part is vscode.LanguageModelDataPart & { mimeType: LanguageModelDataPartMimeType.CacheControl } { + return part instanceof vscode.LanguageModelDataPart && + part.mimeType === LanguageModelDataPartMimeType.CacheControl; +} + +/** + * Parses a LanguageModelDataPart representing a cache breakpoint. + * + * @param part The LanguageModelDataPart to parse. + * @returns The parsed cache breakpoint. + * @throws Will throw an error if the part's mimeType is not JSON, if the JSON parsing fails, + * or if the parsed data does not match the expected schema. + */ +export function parseCacheBreakpoint(part: vscode.LanguageModelDataPart): LanguageModelCacheBreakpoint { + if (part.mimeType !== LanguageModelDataPartMimeType.CacheControl) { + throw new Error(`Expected LanguageModelDataPart with mimeType ${LanguageModelDataPartMimeType.CacheControl}, but got ${part.mimeType}`); + } + + // By matching the Copilot extension, other extensions that use models from either Copilot + // or Positron Assistant can set cache breakpoints with the same schema. + // See: https://github.com/microsoft/vscode-copilot-chat/blob/6aeac371813be9037e74395186ec5b5b94089245/src/extension/byok/vscode-node/anthropicMessageConverter.ts#L22 + const type = part.data.toString(); + if (!(type === LanguageModelCacheBreakpointType.Ephemeral)) { + throw new Error(`Expected LanguageModelDataPart to contain a LanguageModelCacheBreakpoint, but got: ${type}`); + } + + return { type }; +} + +/** + * Create a language model part that represents a cache control point. + * @returns A language model part representing the cache control point. + */ +export function languageModelCacheBreakpointPart(): vscode.LanguageModelDataPart { + // By matching the Copilot extension, other extensions that use models from either Copilot + // or Positron Assistant can set cache breakpoints with the same schema. + // See: https://github.com/microsoft/vscode-copilot-chat/blob/6aeac371813be9037e74395186ec5b5b94089245/src/extension/byok/vscode-node/anthropicMessageConverter.ts#L22 + return vscode.LanguageModelDataPart.text(LanguageModelCacheBreakpointType.Ephemeral, LanguageModelDataPartMimeType.CacheControl); +} diff --git a/src/vs/base/common/marshallingIds.ts b/src/vs/base/common/marshallingIds.ts index 9ea80910eeb7..272aa3594e5d 100644 --- a/src/vs/base/common/marshallingIds.ts +++ b/src/vs/base/common/marshallingIds.ts @@ -27,5 +27,4 @@ export const enum MarshalledId { LanguageModelTextPart, LanguageModelPromptTsxPart, LanguageModelDataPart, - LanguageModelExtraDataPart, } diff --git a/src/vs/platform/extensions/common/extensionsApiProposals.ts b/src/vs/platform/extensions/common/extensionsApiProposals.ts index efa4401388bd..1556e0eb47ea 100644 --- a/src/vs/platform/extensions/common/extensionsApiProposals.ts +++ b/src/vs/platform/extensions/common/extensionsApiProposals.ts @@ -231,7 +231,7 @@ const _allApiProposals = { }, languageModelDataPart: { proposal: 'https://raw.githubusercontent.com/microsoft/vscode/main/src/vscode-dts/vscode.proposed.languageModelDataPart.d.ts', - version: 2 + version: 3 }, languageModelSystem: { proposal: 'https://raw.githubusercontent.com/microsoft/vscode/main/src/vscode-dts/vscode.proposed.languageModelSystem.d.ts', diff --git a/src/vs/workbench/api/common/extHost.api.impl.ts b/src/vs/workbench/api/common/extHost.api.impl.ts index 7ee511716bd7..6ec0e513d1af 100644 --- a/src/vs/workbench/api/common/extHost.api.impl.ts +++ b/src/vs/workbench/api/common/extHost.api.impl.ts @@ -1833,7 +1833,6 @@ export function createApiFactoryAndRegisterActors(accessor: ServicesAccessor): I LanguageModelToolResult: extHostTypes.LanguageModelToolResult, LanguageModelToolResult2: extHostTypes.LanguageModelToolResult2, LanguageModelDataPart: extHostTypes.LanguageModelDataPart, - LanguageModelExtraDataPart: extHostTypes.LanguageModelExtraDataPart, ExtendedLanguageModelToolResult: extHostTypes.ExtendedLanguageModelToolResult, PreparedTerminalToolInvocation: extHostTypes.PreparedTerminalToolInvocation, LanguageModelChatToolMode: extHostTypes.LanguageModelChatToolMode, diff --git a/src/vs/workbench/api/common/extHostTypeConverters.ts b/src/vs/workbench/api/common/extHostTypeConverters.ts index e6fd3913d5f2..773280678980 100644 --- a/src/vs/workbench/api/common/extHostTypeConverters.ts +++ b/src/vs/workbench/api/common/extHostTypeConverters.ts @@ -45,7 +45,7 @@ import { IChatRequestVariableEntry, isImageVariableEntry } from '../../contrib/c import { IChatAgentMarkdownContentWithVulnerability, IChatCodeCitation, IChatCommandButton, IChatConfirmation, IChatContentInlineReference, IChatContentReference, IChatExtensionsContent, IChatFollowup, IChatMarkdownContent, IChatMoveMessage, IChatProgressMessage, IChatResponseCodeblockUriPart, IChatTaskDto, IChatTaskResult, IChatTextEdit, IChatTreeData, IChatUserActionEvent, IChatWarningMessage } from '../../contrib/chat/common/chatService.js'; import { IToolData, IToolResult } from '../../contrib/chat/common/languageModelToolsService.js'; import * as chatProvider from '../../contrib/chat/common/languageModels.js'; -import { IChatResponseDataPart, IChatResponsePromptTsxPart, IChatResponseTextPart } from '../../contrib/chat/common/languageModels.js'; +import { IChatMessageDataPart, IChatResponseDataPart, IChatResponsePromptTsxPart, IChatResponseTextPart } from '../../contrib/chat/common/languageModels.js'; import { DebugTreeItemCollapsibleState, IDebugVisualizationTreeItem } from '../../contrib/debug/common/debug.js'; import * as notebooks from '../../contrib/notebook/common/notebookCommon.js'; import { CellEditType } from '../../contrib/notebook/common/notebookCommon.js'; @@ -2330,12 +2330,14 @@ export namespace LanguageModelChatMessage { } }); return new types.LanguageModelToolResultPart(c.toolCallId, content, c.isError); - } else if (c.type === 'image_url' || c.type === 'extra_data') { + } else if (c.type === 'image_url') { // Non-stable types return undefined; - } else { + } else if (c.type === 'tool_use') { return new types.LanguageModelToolCallPart(c.toolCallId, c.name, c.parameters); } + + return undefined; }).filter(c => c !== undefined); const role = LanguageModelChatMessageRole.to(message.role); @@ -2427,8 +2429,8 @@ export namespace LanguageModelChatMessage2 { return new types.LanguageModelToolResultPart2(c.toolCallId, content, c.isError); } else if (c.type === 'image_url') { return new types.LanguageModelDataPart(c.value.data.buffer, c.value.mimeType); - } else if (c.type === 'extra_data') { - return new types.LanguageModelExtraDataPart(c.kind, c.data); + } else if (c.type === 'data') { + return new types.LanguageModelDataPart(c.data.buffer, c.mimeType); } else { return new types.LanguageModelToolCallPart(c.toolCallId, c.name, c.parameters); } @@ -2480,15 +2482,23 @@ export namespace LanguageModelChatMessage2 { isError: c.isError }; } else if (c instanceof types.LanguageModelDataPart) { - const value: chatProvider.IChatImageURLPart = { - mimeType: c.mimeType as chatProvider.ChatImageMimeType, - data: VSBuffer.wrap(c.data), - }; + if (isImageDataPart(c)) { + const value: chatProvider.IChatImageURLPart = { + mimeType: c.mimeType as chatProvider.ChatImageMimeType, + data: VSBuffer.wrap(c.data), + }; - return { - type: 'image_url', - value: value - }; + return { + type: 'image_url', + value: value + }; + } else { + return { + type: 'data', + mimeType: c.mimeType, + data: VSBuffer.wrap(c.data), + } satisfies IChatMessageDataPart; + } } else if (c instanceof types.LanguageModelToolCallPart) { return { type: 'tool_use', @@ -2501,12 +2511,6 @@ export namespace LanguageModelChatMessage2 { type: 'text', value: c.value }; - } else if (c instanceof types.LanguageModelExtraDataPart) { - return { - type: 'extra_data', - kind: c.kind, - data: c.data - } satisfies chatProvider.IChatMessagePart; } else { if (typeof c !== 'string') { throw new Error('Unexpected chat message content type llm 2'); @@ -2527,6 +2531,19 @@ export namespace LanguageModelChatMessage2 { } } +function isImageDataPart(part: types.LanguageModelDataPart): boolean { + switch (part.mimeType) { + case types.ChatImageMimeType.PNG: + case types.ChatImageMimeType.JPEG: + case types.ChatImageMimeType.GIF: + case types.ChatImageMimeType.WEBP: + case types.ChatImageMimeType.BMP: + return true; + default: + return false; + } +} + export namespace ChatResponseMarkdownPart { export function from(part: vscode.ChatResponseMarkdownPart): Dto { return { diff --git a/src/vs/workbench/api/common/extHostTypes.ts b/src/vs/workbench/api/common/extHostTypes.ts index 1bbaf99167f2..9f1923a32ae2 100644 --- a/src/vs/workbench/api/common/extHostTypes.ts +++ b/src/vs/workbench/api/common/extHostTypes.ts @@ -5012,19 +5012,19 @@ export class LanguageModelChatMessage implements vscode.LanguageModelChatMessage export class LanguageModelChatMessage2 implements vscode.LanguageModelChatMessage2 { - static User(content: string | (LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart | LanguageModelExtraDataPart)[], name?: string): LanguageModelChatMessage2 { + static User(content: string | (LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart)[], name?: string): LanguageModelChatMessage2 { return new LanguageModelChatMessage2(LanguageModelChatMessageRole.User, content, name); } - static Assistant(content: string | (LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart | LanguageModelExtraDataPart)[], name?: string): LanguageModelChatMessage2 { + static Assistant(content: string | (LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart)[], name?: string): LanguageModelChatMessage2 { return new LanguageModelChatMessage2(LanguageModelChatMessageRole.Assistant, content, name); } role: vscode.LanguageModelChatMessageRole; - private _content: (LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart | LanguageModelExtraDataPart)[] = []; + private _content: (LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart)[] = []; - set content(value: string | (LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart | LanguageModelExtraDataPart)[]) { + set content(value: string | (LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart)[]) { if (typeof value === 'string') { // we changed this and still support setting content with a string property. this keep the API runtime stable // despite the breaking change in the type definition. @@ -5034,7 +5034,7 @@ export class LanguageModelChatMessage2 implements vscode.LanguageModelChatMessag } } - get content(): (LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart | LanguageModelExtraDataPart)[] { + get content(): (LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart)[] { return this._content; } @@ -5050,7 +5050,7 @@ export class LanguageModelChatMessage2 implements vscode.LanguageModelChatMessag } } - get content2(): (string | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart | LanguageModelExtraDataPart)[] | undefined { + get content2(): (string | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart)[] | undefined { return this.content.map(part => { if (part instanceof LanguageModelTextPart) { return part.value; @@ -5061,7 +5061,7 @@ export class LanguageModelChatMessage2 implements vscode.LanguageModelChatMessag name: string | undefined; - constructor(role: vscode.LanguageModelChatMessageRole, content: string | (LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart | LanguageModelExtraDataPart)[], name?: string) { + constructor(role: vscode.LanguageModelChatMessageRole, content: string | (LanguageModelTextPart | LanguageModelToolResultPart2 | LanguageModelToolCallPart | LanguageModelDataPart)[], name?: string) { this.role = role; this.content = content; this.name = name; @@ -5110,13 +5110,13 @@ export class LanguageModelDataPart implements vscode.LanguageModelDataPart { return new LanguageModelDataPart(data, mimeType as string); } - static json(value: object): vscode.LanguageModelDataPart { + static json(value: object, mime: string = 'text/x-json'): vscode.LanguageModelDataPart { const rawStr = JSON.stringify(value, undefined, '\t'); - return new LanguageModelDataPart(VSBuffer.fromString(rawStr).buffer, 'json'); + return new LanguageModelDataPart(VSBuffer.fromString(rawStr).buffer, mime); } - static text(value: string): vscode.LanguageModelDataPart { - return new LanguageModelDataPart(VSBuffer.fromString(value).buffer, 'text/plain'); + static text(value: string, mime: string = Mimes.text): vscode.LanguageModelDataPart { + return new LanguageModelDataPart(VSBuffer.fromString(value).buffer, mime); } toJSON() { @@ -5136,24 +5136,6 @@ export enum ChatImageMimeType { BMP = 'image/bmp', } -export class LanguageModelExtraDataPart implements vscode.LanguageModelExtraDataPart { - kind: string; - data: any; - - constructor(kind: string, data: any) { - this.kind = kind; - this.data = data; - } - - toJSON() { - return { - $mid: MarshalledId.LanguageModelExtraDataPart, - kind: this.kind, - data: this.data, - }; - } -} - export class LanguageModelPromptTsxPart { value: unknown; diff --git a/src/vs/workbench/contrib/chat/common/languageModels.ts b/src/vs/workbench/contrib/chat/common/languageModels.ts index b6ae22e7f970..1dd63f7f5720 100644 --- a/src/vs/workbench/contrib/chat/common/languageModels.ts +++ b/src/vs/workbench/contrib/chat/common/languageModels.ts @@ -40,10 +40,10 @@ export interface IChatMessageImagePart { value: IChatImageURLPart; } -export interface IChatMessageExtraDataPart { - type: 'extra_data'; - kind: string; - data: any; +export interface IChatMessageDataPart { + type: 'data'; + mimeType: string; + data: VSBuffer; } export interface IChatImageURLPart { @@ -85,7 +85,7 @@ export interface IChatMessageToolResultPart { isError?: boolean; } -export type IChatMessagePart = IChatMessageTextPart | IChatMessageToolResultPart | IChatResponseToolUsePart | IChatMessageImagePart | IChatMessageExtraDataPart; +export type IChatMessagePart = IChatMessageTextPart | IChatMessageToolResultPart | IChatResponseToolUsePart | IChatMessageImagePart | IChatMessageDataPart; export interface IChatMessage { readonly name?: string | undefined; diff --git a/src/vscode-dts/vscode.proposed.languageModelDataPart.d.ts b/src/vscode-dts/vscode.proposed.languageModelDataPart.d.ts index f365ede02e6f..902ed64ec97d 100644 --- a/src/vscode-dts/vscode.proposed.languageModelDataPart.d.ts +++ b/src/vscode-dts/vscode.proposed.languageModelDataPart.d.ts @@ -3,7 +3,7 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ -// version: 2 +// version: 3 declare module 'vscode' { @@ -23,7 +23,7 @@ declare module 'vscode' { * @param content The content of the message. * @param name The optional name of a user for the message. */ - static User(content: string | Array, name?: string): LanguageModelChatMessage2; + static User(content: string | Array, name?: string): LanguageModelChatMessage2; /** * Utility to create a new assistant message. @@ -31,7 +31,7 @@ declare module 'vscode' { * @param content The content of the message. * @param name The optional name of a user for the message. */ - static Assistant(content: string | Array, name?: string): LanguageModelChatMessage2; + static Assistant(content: string | Array, name?: string): LanguageModelChatMessage2; /** * The role of this message. @@ -42,7 +42,7 @@ declare module 'vscode' { * A string or heterogeneous array of things that a message can contain as content. Some parts may be message-type * specific for some models. */ - content: Array; + content: Array; /** * The optional name of a user for this message. @@ -56,7 +56,7 @@ declare module 'vscode' { * @param content The content of the message. * @param name The optional name of a user for the message. */ - constructor(role: LanguageModelChatMessageRole, content: string | Array, name?: string); + constructor(role: LanguageModelChatMessageRole, content: string | Array, name?: string); } /** @@ -70,9 +70,9 @@ declare module 'vscode' { */ static image(data: Uint8Array, mimeType: ChatImageMimeType): LanguageModelDataPart; - static json(value: object): LanguageModelDataPart; + static json(value: any, mime?: string): LanguageModelDataPart; - static text(value: string): LanguageModelDataPart; + static text(value: string, mime?: string): LanguageModelDataPart; /** * The mime type which determines how the data property is interpreted. @@ -102,31 +102,6 @@ declare module 'vscode' { BMP = 'image/bmp', } - /** - * Tagging onto this proposal, because otherwise managing two different extensions of LanguageModelChatMessage could be confusing. - * A language model response part containing arbitrary model-specific data, returned from a {@link LanguageModelChatResponse}. - * TODO@API naming, looking at LanguageModelChatRequestOptions.modelOptions, but LanguageModelModelData is not very good. - * LanguageModelOpaqueData from prompt-tsx? - */ - export class LanguageModelExtraDataPart { - /** - * The type of data. The allowed values and data types here are model-specific. - */ - kind: string; - - /** - * Extra model-specific data. - */ - data: any; - - /** - * Construct an extra data part with the given content. - * @param value The image content of the part. - */ - constructor(kind: string, data: any); - } - - /** * The result of a tool call. This is the counterpart of a {@link LanguageModelToolCallPart tool call} and * it can only be included in the content of a User message