Skip to content

Commit dd885e2

Browse files
committed
fix-usage-chunk
1 parent 40fa3d6 commit dd885e2

File tree

2 files changed

+39
-18
lines changed

2 files changed

+39
-18
lines changed

internal/extproc/translator/openai_gcpvertexai.go

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -175,16 +175,6 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) handleStreamingResponse(
175175
// Convert GCP chunk to OpenAI chunk.
176176
openAIChunk := o.convertGCPChunkToOpenAI(chunk)
177177

178-
// Extract token usage if present in this chunk (typically in the last chunk).
179-
if chunk.UsageMetadata != nil {
180-
tokenUsage = LLMTokenUsage{
181-
InputTokens: uint32(chunk.UsageMetadata.PromptTokenCount), //nolint:gosec
182-
OutputTokens: uint32(chunk.UsageMetadata.CandidatesTokenCount), //nolint:gosec
183-
TotalTokens: uint32(chunk.UsageMetadata.TotalTokenCount), //nolint:gosec
184-
CachedInputTokens: uint32(chunk.UsageMetadata.CachedContentTokenCount), //nolint:gosec
185-
}
186-
}
187-
188178
// Serialize to SSE format as expected by OpenAI API.
189179
var chunkBytes []byte
190180
chunkBytes, err = json.Marshal(openAIChunk)
@@ -198,6 +188,40 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) handleStreamingResponse(
198188
if span != nil {
199189
span.RecordResponseChunk(openAIChunk)
200190
}
191+
192+
// Extract token usage only in the last chunk.
193+
if chunk.UsageMetadata != nil && chunk.UsageMetadata.PromptTokenCount > 0 {
194+
// Convert usage to pointer if available.
195+
usage := ptr.To(geminiUsageToOpenAIUsage(chunk.UsageMetadata))
196+
197+
usageChunk := openai.ChatCompletionResponseChunk{
198+
Object: "chat.completion.chunk",
199+
Choices: []openai.ChatCompletionResponseChunkChoice{},
200+
// usage is nil for all chunks other than the last chunk
201+
Usage: usage,
202+
}
203+
204+
// Serialize to SSE format as expected by OpenAI API.
205+
var chunkBytes []byte
206+
chunkBytes, err = json.Marshal(usageChunk)
207+
if err != nil {
208+
return nil, nil, LLMTokenUsage{}, "", fmt.Errorf("error marshaling OpenAI chunk: %w", err)
209+
}
210+
sseChunkBuf.WriteString("data: ")
211+
sseChunkBuf.Write(chunkBytes)
212+
sseChunkBuf.WriteString("\n\n")
213+
214+
if span != nil {
215+
span.RecordResponseChunk(openAIChunk)
216+
}
217+
218+
tokenUsage = LLMTokenUsage{
219+
InputTokens: uint32(chunk.UsageMetadata.PromptTokenCount), //nolint:gosec
220+
OutputTokens: uint32(chunk.UsageMetadata.CandidatesTokenCount), //nolint:gosec
221+
TotalTokens: uint32(chunk.UsageMetadata.TotalTokenCount), //nolint:gosec
222+
CachedInputTokens: uint32(chunk.UsageMetadata.CachedContentTokenCount), //nolint:gosec
223+
}
224+
}
201225
}
202226
mut := &extprocv3.BodyMutation_Body{
203227
Body: sseChunkBuf.Bytes(),
@@ -251,16 +275,11 @@ func (o *openAIToGCPVertexAITranslatorV1ChatCompletion) convertGCPChunkToOpenAI(
251275
choices = []openai.ChatCompletionResponseChunkChoice{}
252276
}
253277

254-
// Convert usage to pointer if available.
255-
var usage *openai.Usage
256-
if chunk.UsageMetadata != nil {
257-
usage = ptr.To(geminiUsageToOpenAIUsage(chunk.UsageMetadata))
258-
}
259-
260278
return &openai.ChatCompletionResponseChunk{
261279
Object: "chat.completion.chunk",
262280
Choices: choices,
263-
Usage: usage,
281+
// usage is nil for all chunks other than the last chunk
282+
Usage: nil,
264283
}
265284
}
266285

internal/extproc/translator/openai_gcpvertexai_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1054,7 +1054,9 @@ func TestOpenAIToGCPVertexAITranslatorV1ChatCompletion_ResponseBody(t *testing.T
10541054
wantHeaderMut: nil,
10551055
wantBodyMut: &extprocv3.BodyMutation{
10561056
Mutation: &extprocv3.BodyMutation_Body{
1057-
Body: []byte(`data: {"choices":[{"index":0,"delta":{"content":"Hello","role":"assistant"}}],"object":"chat.completion.chunk","usage":{"prompt_tokens":5,"completion_tokens":3,"total_tokens":8,"completion_tokens_details":{},"prompt_tokens_details":{}}}
1057+
Body: []byte(`data: {"choices":[{"index":0,"delta":{"content":"Hello","role":"assistant"}}],"object":"chat.completion.chunk"}
1058+
1059+
data: {"object":"chat.completion.chunk","usage":{"prompt_tokens":5,"completion_tokens":3,"total_tokens":8,"completion_tokens_details":{},"prompt_tokens_details":{}}}
10581060
10591061
data: [DONE]
10601062
`),

0 commit comments

Comments
 (0)