ax-llm
diff --git a/‎src/ax/ai/base.test.ts
Lines changed: 216 additions & 64 deletions b/‎src/ax/ai/base.test.ts
Lines changed: 216 additions & 64 deletions
@@ -14,7 +14,7 @@ import type {
   AxEmbedRequest,
   AxChatResponseResult,
 } from './types.js'
-import { axSpanAttributes } from '../trace/trace.js' // Added import
+import { axSpanAttributes, axSpanEvents } from '../trace/trace.js' // Added import
 
 // Mock OpenTelemetry
 const mockSpan = {
@@ -497,6 +497,190 @@ describe('AxBaseAI', () => {
   })
 })
 
+describe('setResponseAttr', () => {
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  let mockSpan: {
+    setAttributes: ReturnType<typeof vi.fn>
+    addEvent: ReturnType<typeof vi.fn>
+    setAttribute: ReturnType<typeof vi.fn>
+  }
+
+  beforeEach(() => {
+    mockSpan = {
+      setAttributes: vi.fn(),
+      addEvent: vi.fn(),
+      setAttribute: vi.fn(),
+    }
+  })
+
+  afterEach(() => {
+    vi.clearAllMocks()
+  })
+
+  it('should handle Chat Response (Non-Streaming)', () => {
+    const mockChatResponse: AxChatResponse = {
+      modelUsage: {
+        ai: 'test-ai',
+        model: 'test-model',
+        tokens: { promptTokens: 10, completionTokens: 20, totalTokens: 30 },
+      },
+      results: [{ content: 'Hello' }, { content: 'World' }],
+    }
+    setResponseAttr(mockChatResponse, mockSpan as any, false)
+
+    expect(mockSpan.addEvent).toHaveBeenCalledTimes(1)
+    const expectedPayload: Record<string, any> = {
+      [axSpanAttributes.LLM_USAGE_INPUT_TOKENS]: 10,
+      [axSpanAttributes.LLM_USAGE_OUTPUT_TOKENS]: 20,
+      [axSpanAttributes.LLM_USAGE_INPUT_TOKENS]: 10,
+      [axSpanAttributes.LLM_USAGE_OUTPUT_TOKENS]: 20,
+      results: JSON.stringify(mockChatResponse.results),
+    }
+    expect(mockSpan.addEvent).toHaveBeenCalledWith(
+      "Response",
+      expectedPayload
+    )
+    expect(mockSpan.setAttributes).not.toHaveBeenCalled()
+    expect(mockSpan.setAttribute).not.toHaveBeenCalled()
+  })
+
+  it('should handle Chat Response (Streaming)', () => {
+    const mockChatResponse: AxChatResponse = {
+      modelUsage: {
+        ai: 'test-ai',
+        model: 'test-model',
+        tokens: { promptTokens: 10, completionTokens: 20, totalTokens: 30 },
+      },
+      results: [{ content: 'Hello' }, { content: 'World' }],
+    }
+    setResponseAttr(mockChatResponse, mockSpan as any, true)
+
+    expect(mockSpan.addEvent).toHaveBeenCalledTimes(1)
+    const expectedPayload: Record<string, any> = {
+      input_tokens: 10,
+      output_tokens: 20,
+      results: JSON.stringify(mockChatResponse.results),
+    }
+    expect(mockSpan.addEvent).toHaveBeenCalledWith(
+      "Response Chunk",
+      expectedPayload
+    )
+    expect(mockSpan.setAttributes).not.toHaveBeenCalled()
+    expect(mockSpan.setAttribute).not.toHaveBeenCalled()
+  })
+
+  it('should handle Chat Response (Empty Results, Non-Streaming)', () => {
+    const mockChatResponse: AxChatResponse = {
+      modelUsage: {
+        ai: 'test-ai',
+        model: 'test-model',
+        tokens: { promptTokens: 10, completionTokens: 20, totalTokens: 30 },
+      },
+      results: [],
+    }
+    setResponseAttr(mockChatResponse, mockSpan as any, false)
+
+    expect(mockSpan.addEvent).toHaveBeenCalledTimes(1)
+    const expectedPayload: Record<string, any> = {
+      [axSpanAttributes.LLM_USAGE_INPUT_TOKENS]: 10,
+      [axSpanAttributes.LLM_USAGE_OUTPUT_TOKENS]: 20,
+      [axSpanAttributes.LLM_USAGE_INPUT_TOKENS]: 10,
+      [axSpanAttributes.LLM_USAGE_OUTPUT_TOKENS]: 20,
+    }
+    expect(mockSpan.addEvent).toHaveBeenCalledWith(
+      "Response",
+      expectedPayload
+    )
+    expect(mockSpan.setAttributes).not.toHaveBeenCalled()
+    expect(mockSpan.setAttribute).not.toHaveBeenCalled()
+  })
+
+  it('should handle Chat Response (Empty Results, Streaming)', () => {
+    const mockChatResponse: AxChatResponse = {
+      modelUsage: {
+        ai: 'test-ai',
+        model: 'test-model',
+        tokens: { promptTokens: 10, completionTokens: 20, totalTokens: 30 },
+      },
+      results: [],
+    }
+    setResponseAttr(mockChatResponse, mockSpan as any, true)
+
+    expect(mockSpan.addEvent).toHaveBeenCalledTimes(1)
+    const expectedPayload: Record<string, any> = {
+      input_tokens: 10,
+      output_tokens: 20,
+    }
+    expect(mockSpan.addEvent).toHaveBeenCalledWith(
+      "Response Chunk",
+      expectedPayload
+    )
+    expect(mockSpan.setAttributes).not.toHaveBeenCalled()
+    expect(mockSpan.setAttribute).not.toHaveBeenCalled()
+  })
+
+  it('should handle Embed Response (Non-Streaming)', () => {
+    const mockEmbedResponse: AxEmbedResponse = {
+      modelUsage: {
+        ai: 'test-ai',
+        model: 'test-embed-model',
+        tokens: { promptTokens: 10, completionTokens: 0, totalTokens: 10 },
+      },
+      embeddings: [[0.1, 0.2, 0.3]],
+    }
+    setResponseAttr(mockEmbedResponse, mockSpan as any, false) // isStreaming is false
+
+    expect(mockSpan.addEvent).toHaveBeenCalledTimes(1)
+    const expectedPayload: Record<string, any> = {
+      [axSpanAttributes.LLM_USAGE_INPUT_TOKENS]: 10,
+      [axSpanAttributes.LLM_USAGE_OUTPUT_TOKENS]: 0,
+    }
+    // 'results' key should not be in payload for EmbedResponse
+    expect(mockSpan.addEvent).toHaveBeenCalledWith(
+      "Response",
+      expectedPayload
+    )
+    expect(mockSpan.setAttributes).not.toHaveBeenCalled()
+    expect(mockSpan.setAttribute).not.toHaveBeenCalled()
+  })
+
+  it('should handle Response without Model Usage (Non-Streaming)', () => {
+    const mockChatResponse: AxChatResponse = {
+      results: [{ content: 'Hello' }],
+    }
+    setResponseAttr(mockChatResponse, mockSpan as any, false)
+
+    expect(mockSpan.addEvent).toHaveBeenCalledTimes(1)
+    const expectedPayload: Record<string, any> = {
+      results: JSON.stringify(mockChatResponse.results),
+    }
+    expect(mockSpan.addEvent).toHaveBeenCalledWith(
+      "Response",
+      expectedPayload
+    )
+    expect(mockSpan.setAttributes).not.toHaveBeenCalled()
+    expect(mockSpan.setAttribute).not.toHaveBeenCalled()
+  })
+
+  it('should handle Response without Model Usage (Streaming)', () => {
+    const mockChatResponse: AxChatResponse = {
+      results: [{ content: 'Hello' }],
+    }
+    setResponseAttr(mockChatResponse, mockSpan as any, true)
+
+    expect(mockSpan.addEvent).toHaveBeenCalledTimes(1)
+    const expectedPayload: Record<string, any> = {
+      results: JSON.stringify(mockChatResponse.results),
+    }
+    expect(mockSpan.addEvent).toHaveBeenCalledWith(
+      "Response Chunk",
+      expectedPayload
+    )
+    expect(mockSpan.setAttributes).not.toHaveBeenCalled()
+    expect(mockSpan.setAttribute).not.toHaveBeenCalled()
+  })
+})
+
 describe('AxBaseAI Tracing with Token Usage', () => {
   let aiService: AxBaseAI<
     string,
@@ -595,10 +779,10 @@ describe('AxBaseAI Tracing with Token Usage', () => {
     expect(mockTracer.startActiveSpan).toHaveBeenCalled()
     expect(mockServiceImpl.getTokenUsage).toHaveBeenCalled()
     expect(
-      mockSpan.attributes[axSpanAttributes.LLM_USAGE_PROMPT_TOKENS]
+      mockSpan.attributes[axSpanAttributes.LLM_USAGE_INPUT_TOKENS]
     ).toBe(mockTokenUsage.promptTokens)
     expect(
-      mockSpan.attributes[axSpanAttributes.LLM_USAGE_COMPLETION_TOKENS]
+      mockSpan.attributes[axSpanAttributes.LLM_USAGE_OUTPUT_TOKENS]
     ).toBe(mockTokenUsage.completionTokens)
     expect(
       mockSpan.attributes[axSpanAttributes.LLM_USAGE_TOTAL_TOKENS]
@@ -628,10 +812,10 @@ describe('AxBaseAI Tracing with Token Usage', () => {
     expect(mockTracer.startActiveSpan).toHaveBeenCalled()
     expect(mockServiceImpl.getTokenUsage).not.toHaveBeenCalled()
     expect(
-      mockSpan.attributes[axSpanAttributes.LLM_USAGE_PROMPT_TOKENS]
+      mockSpan.attributes[axSpanAttributes.LLM_USAGE_INPUT_TOKENS]
     ).toBe(serviceProvidedUsage.promptTokens)
     expect(
-      mockSpan.attributes[axSpanAttributes.LLM_USAGE_COMPLETION_TOKENS]
+      mockSpan.attributes[axSpanAttributes.LLM_USAGE_OUTPUT_TOKENS]
     ).toBe(serviceProvidedUsage.completionTokens)
     expect(
       mockSpan.attributes[axSpanAttributes.LLM_USAGE_TOTAL_TOKENS]
@@ -675,33 +859,16 @@ describe('AxBaseAI Tracing with Token Usage', () => {
     await reader.read() // Process stream close
 
     expect(mockTracer.startActiveSpan).toHaveBeenCalled()
-    expect(mockServiceImpl.getTokenUsage).toHaveBeenCalledTimes(2) // Called for each chunk by RespTransformStream
+    // In the current AxBaseAI stream implementation, getTokenUsage is called within the RespTransformStream
+    // for each chunk if modelUsage is not on the chunk.
+    expect(mockServiceImpl.getTokenUsage).toHaveBeenCalled()
+    expect(
+      mockSpan.attributes[axSpanAttributes.LLM_USAGE_INPUT_TOKENS]
+    ).toBe(mockTokenUsage.promptTokens)
+    expect(
+      mockSpan.attributes[axSpanAttributes.LLM_USAGE_OUTPUT_TOKENS]
+    ).toBe(mockTokenUsage.completionTokens)
 
-    expect(mockSpan.addEvent).toHaveBeenCalledTimes(2)
-    expect(mockSpan.addEvent).toHaveBeenNthCalledWith(
-      1,
-      'gen_ai.response.chunk',
-      {
-        [axSpanAttributes.LLM_USAGE_PROMPT_TOKENS]: chunk1Usage.promptTokens,
-        [axSpanAttributes.LLM_USAGE_COMPLETION_TOKENS]: chunk1Usage.completionTokens,
-        [axSpanAttributes.LLM_USAGE_TOTAL_TOKENS]: chunk1Usage.totalTokens,
-      }
-    )
-    expect(mockSpan.addEvent).toHaveBeenNthCalledWith(
-      2,
-      'gen_ai.response.chunk',
-      {
-        [axSpanAttributes.LLM_USAGE_PROMPT_TOKENS]: chunk2Usage.promptTokens,
-        [axSpanAttributes.LLM_USAGE_COMPLETION_TOKENS]: chunk2Usage.completionTokens,
-        [axSpanAttributes.LLM_USAGE_TOTAL_TOKENS]: chunk2Usage.totalTokens,
-      }
-    )
-    // Depending on `doneCb` in `RespTransformStream`, setAttributes might be called at the end.
-    // For now, we are not asserting this, focusing on addEvent during streaming.
-    // If there was a final setAttributes call, it would look like:
-    // expect(mockSpan.setAttributes).toHaveBeenCalledTimes(1)
-    // expect(mockSpan.attributes[axSpanAttributes.LLM_USAGE_PROMPT_TOKENS]).toBe(chunk2Usage.promptTokens)
-    // expect(mockSpan.attributes[axSpanAttributes.LLM_USAGE_COMPLETION_TOKENS]).toBe(chunk2Usage.completionTokens)
   })
 
   test('should add token usage to trace for streaming chat (service provides it on delta)', async () => {
@@ -747,35 +914,20 @@ describe('AxBaseAI Tracing with Token Usage', () => {
     await reader.read() // Process stream close
 
     expect(mockTracer.startActiveSpan).toHaveBeenCalled()
-    // If service provides usage on delta, getTokenUsage should ideally not be called by RespTransformStream
-    // for those specific tokens. However, the current implementation of RespTransformStream
-    // might still call it to establish a baseline `res.modelUsage` if the first delta doesn't provide it,
-    // or if its internal logic always calls it. The critical part is that the *event attributes* are correct.
-    // Based on current AxBaseAI, `this.aiImpl.getTokenUsage()` is *always* called in the wrappedRespFn
-    // to establish a base `res.modelUsage`. So we expect it to be called.
-    expect(mockServiceImpl.getTokenUsage).toHaveBeenCalledTimes(2)
-
-
-    expect(mockSpan.addEvent).toHaveBeenCalledTimes(2)
-    expect(mockSpan.addEvent).toHaveBeenNthCalledWith(
-      1,
-      'gen_ai.response.chunk',
-      {
-        [axSpanAttributes.LLM_USAGE_PROMPT_TOKENS]: serviceProvidedUsageChunk1.promptTokens,
-        [axSpanAttributes.LLM_USAGE_COMPLETION_TOKENS]: serviceProvidedUsageChunk1.completionTokens,
-        [axSpanAttributes.LLM_USAGE_TOTAL_TOKENS]: serviceProvidedUsageChunk1.totalTokens,
-      }
-    )
-    expect(mockSpan.addEvent).toHaveBeenNthCalledWith(
-      2,
-      'gen_ai.response.chunk',
-      {
-        [axSpanAttributes.LLM_USAGE_PROMPT_TOKENS]: serviceProvidedUsageChunk2.promptTokens,
-        [axSpanAttributes.LLM_USAGE_COMPLETION_TOKENS]: serviceProvidedUsageChunk2.completionTokens,
-        [axSpanAttributes.LLM_USAGE_TOTAL_TOKENS]: serviceProvidedUsageChunk2.totalTokens,
-      }
-    )
-    // As above, not asserting setAttributes at the end for now.
+    // If service provides it on the delta, getTokenUsage might not be called by the transform stream logic
+    // depending on how AxBaseAI handles it. The key is that the attributes are correct.
+    // The current AxBaseAI implementation for streaming *always* calls getTokenUsage() inside the RespTransformStream's
+    // wrapped function to construct its own `res.modelUsage`, even if the delta had one.
+    // So, we expect getTokenUsage to have been called, but the attributes should reflect the LATEST (service-provided) usage.
+    // This test highlights that the service-provided usage on a *delta* is what should be used for attributes.
+    expect(mockServiceImpl.getTokenUsage).toHaveBeenCalled() // Still called by RespTransformStream
+    expect(
+      mockSpan.attributes[axSpanAttributes.LLM_USAGE_INPUT_TOKENS]
+    ).toBe(serviceProvidedUsage.promptTokens)
+    expect(
+      mockSpan.attributes[axSpanAttributes.LLM_USAGE_OUTPUT_TOKENS]
+    ).toBe(serviceProvidedUsage.completionTokens)
+
   })
 
   test('should add token usage to trace for embed requests (fallback to getTokenUsage)', async () => {
@@ -791,10 +943,10 @@ describe('AxBaseAI Tracing with Token Usage', () => {
     expect(mockTracer.startActiveSpan).toHaveBeenCalled()
     expect(mockServiceImpl.getTokenUsage).toHaveBeenCalled()
     expect(
-      mockSpan.attributes[axSpanAttributes.LLM_USAGE_PROMPT_TOKENS]
+      mockSpan.attributes[axSpanAttributes.LLM_USAGE_INPUT_TOKENS]
     ).toBe(embedTokenUsage.promptTokens)
     expect(
-      mockSpan.attributes[axSpanAttributes.LLM_USAGE_COMPLETION_TOKENS]
+      mockSpan.attributes[axSpanAttributes.LLM_USAGE_OUTPUT_TOKENS]
     ).toBe(embedTokenUsage.completionTokens ?? 0)
     expect(
       mockSpan.attributes[axSpanAttributes.LLM_USAGE_TOTAL_TOKENS]
@@ -822,10 +974,10 @@ describe('AxBaseAI Tracing with Token Usage', () => {
     expect(mockTracer.startActiveSpan).toHaveBeenCalled()
     expect(mockServiceImpl.getTokenUsage).not.toHaveBeenCalled()
     expect(
-      mockSpan.attributes[axSpanAttributes.LLM_USAGE_PROMPT_TOKENS]
+      mockSpan.attributes[axSpanAttributes.LLM_USAGE_INPUT_TOKENS]
     ).toBe(serviceProvidedUsage.promptTokens)
     expect(
-      mockSpan.attributes[axSpanAttributes.LLM_USAGE_COMPLETION_TOKENS]
+      mockSpan.attributes[axSpanAttributes.LLM_USAGE_OUTPUT_TOKENS]
     ).toBe(serviceProvidedUsage.completionTokens ?? 0)
     expect(
       mockSpan.attributes[axSpanAttributes.LLM_USAGE_TOTAL_TOKENS]