@@ -78,17 +78,63 @@ type ChatMessageImageURL struct {
78
78
Detail ImageURLDetail `json:"detail,omitempty"`
79
79
}
80
80
81
+ type AudioVoice string
82
+
83
+ const (
84
+ AudioVoiceAlloy AudioVoice = "alloy"
85
+ AudioVoiceAsh AudioVoice = "ash"
86
+ AudioVoiceBallad AudioVoice = "ballad"
87
+ AudioVoiceCoral AudioVoice = "coral"
88
+ AudioVoiceEcho AudioVoice = "echo"
89
+ AudioVoiceSage AudioVoice = "sage"
90
+ AudioVoiceShimmer AudioVoice = "shimmer"
91
+ AudioVoiceVerse AudioVoice = "verse"
92
+ )
93
+
94
+ type AudioFormat string
95
+
96
+ const (
97
+ AudioFormatWAV AudioFormat = "wav"
98
+ AudioFormatMP3 AudioFormat = "mp3"
99
+ AudioFormatFLAC AudioFormat = "flac"
100
+ AudioFormatOPUS AudioFormat = "opus"
101
+ AudioFormatPCM16 AudioFormat = "pcm16"
102
+ )
103
+
104
+ type ChatMessageAudio struct {
105
+ // Base64 encoded audio data.
106
+ Data string `json:"data,omitempty"`
107
+ // The format of the encoded audio data. Currently supports "wav" and "mp3".
108
+ Format AudioFormat `json:"format,omitempty"`
109
+ }
110
+
111
+ type Modality string
112
+
113
+ const (
114
+ ModalityAudio Modality = "audio"
115
+ ModalityText Modality = "text"
116
+ )
117
+
118
+ type AudioOutput struct {
119
+ // The voice the model uses to respond. Supported voices are alloy, ash, ballad, coral, echo, sage, shimmer, and verse.
120
+ Voice AudioVoice `json:"voice"`
121
+ // Specifies the output audio format. Must be one of wav, mp3, flac, opus, or pcm16.
122
+ Format AudioFormat `json:"format"`
123
+ }
124
+
81
125
type ChatMessagePartType string
82
126
83
127
const (
84
- ChatMessagePartTypeText ChatMessagePartType = "text"
85
- ChatMessagePartTypeImageURL ChatMessagePartType = "image_url"
128
+ ChatMessagePartTypeText ChatMessagePartType = "text"
129
+ ChatMessagePartTypeImageURL ChatMessagePartType = "image_url"
130
+ ChatMessagePartTypeInputAudio ChatMessagePartType = "input_audio"
86
131
)
87
132
88
133
type ChatMessagePart struct {
89
- Type ChatMessagePartType `json:"type,omitempty"`
90
- Text string `json:"text,omitempty"`
91
- ImageURL * ChatMessageImageURL `json:"image_url,omitempty"`
134
+ Type ChatMessagePartType `json:"type,omitempty"`
135
+ Text string `json:"text,omitempty"`
136
+ ImageURL * ChatMessageImageURL `json:"image_url,omitempty"`
137
+ InputAudio * ChatMessageAudio `json:"input_audio,omitempty"`
92
138
}
93
139
94
140
type ChatCompletionMessage struct {
@@ -110,72 +156,74 @@ type ChatCompletionMessage struct {
110
156
111
157
// For Role=tool prompts this should be set to the ID given in the assistant's prior request to call a tool.
112
158
ToolCallID string `json:"tool_call_id,omitempty"`
159
+
160
+ // If the audio output modality is requested, this object contains data about the audio response from the model.
161
+ Audio * ChatCompletionAudio `json:"audio,omitempty"`
162
+ }
163
+
164
+ type chatCompletionMessageMultiContent struct {
165
+ Role string `json:"role"`
166
+ Content string `json:"-"`
167
+ Refusal string `json:"refusal,omitempty"`
168
+ MultiContent []ChatMessagePart `json:"content,omitempty"`
169
+ Name string `json:"name,omitempty"`
170
+ FunctionCall * FunctionCall `json:"function_call,omitempty"`
171
+ ToolCalls []ToolCall `json:"tool_calls,omitempty"`
172
+ ToolCallID string `json:"tool_call_id,omitempty"`
173
+ Audio * ChatCompletionAudio `json:"audio,omitempty"`
174
+ }
175
+
176
+ type chatCompletionMessageSingleContent struct {
177
+ Role string `json:"role"`
178
+ Content string `json:"content"`
179
+ Refusal string `json:"refusal,omitempty"`
180
+ MultiContent []ChatMessagePart `json:"-"`
181
+ Name string `json:"name,omitempty"`
182
+ FunctionCall * FunctionCall `json:"function_call,omitempty"`
183
+ ToolCalls []ToolCall `json:"tool_calls,omitempty"`
184
+ ToolCallID string `json:"tool_call_id,omitempty"`
185
+ Audio * ChatCompletionAudio `json:"audio,omitempty"`
113
186
}
114
187
115
188
func (m ChatCompletionMessage ) MarshalJSON () ([]byte , error ) {
116
189
if m .Content != "" && m .MultiContent != nil {
117
190
return nil , ErrContentFieldsMisused
118
191
}
119
192
if len (m .MultiContent ) > 0 {
120
- msg := struct {
121
- Role string `json:"role"`
122
- Content string `json:"-"`
123
- Refusal string `json:"refusal,omitempty"`
124
- MultiContent []ChatMessagePart `json:"content,omitempty"`
125
- Name string `json:"name,omitempty"`
126
- FunctionCall * FunctionCall `json:"function_call,omitempty"`
127
- ToolCalls []ToolCall `json:"tool_calls,omitempty"`
128
- ToolCallID string `json:"tool_call_id,omitempty"`
129
- }(m )
193
+ msg := chatCompletionMessageMultiContent (m )
130
194
return json .Marshal (msg )
131
195
}
132
196
133
- msg := struct {
134
- Role string `json:"role"`
135
- Content string `json:"content"`
136
- Refusal string `json:"refusal,omitempty"`
137
- MultiContent []ChatMessagePart `json:"-"`
138
- Name string `json:"name,omitempty"`
139
- FunctionCall * FunctionCall `json:"function_call,omitempty"`
140
- ToolCalls []ToolCall `json:"tool_calls,omitempty"`
141
- ToolCallID string `json:"tool_call_id,omitempty"`
142
- }(m )
197
+ msg := chatCompletionMessageSingleContent (m )
143
198
return json .Marshal (msg )
144
199
}
145
200
146
201
func (m * ChatCompletionMessage ) UnmarshalJSON (bs []byte ) error {
147
- msg := struct {
148
- Role string `json:"role"`
149
- Content string `json:"content"`
150
- Refusal string `json:"refusal,omitempty"`
151
- MultiContent []ChatMessagePart
152
- Name string `json:"name,omitempty"`
153
- FunctionCall * FunctionCall `json:"function_call,omitempty"`
154
- ToolCalls []ToolCall `json:"tool_calls,omitempty"`
155
- ToolCallID string `json:"tool_call_id,omitempty"`
156
- }{}
202
+ msg := chatCompletionMessageSingleContent {}
157
203
158
204
if err := json .Unmarshal (bs , & msg ); err == nil {
159
205
* m = ChatCompletionMessage (msg )
160
206
return nil
161
207
}
162
- multiMsg := struct {
163
- Role string `json:"role"`
164
- Content string
165
- Refusal string `json:"refusal,omitempty"`
166
- MultiContent []ChatMessagePart `json:"content"`
167
- Name string `json:"name,omitempty"`
168
- FunctionCall * FunctionCall `json:"function_call,omitempty"`
169
- ToolCalls []ToolCall `json:"tool_calls,omitempty"`
170
- ToolCallID string `json:"tool_call_id,omitempty"`
171
- }{}
208
+ multiMsg := chatCompletionMessageMultiContent {}
172
209
if err := json .Unmarshal (bs , & multiMsg ); err != nil {
173
210
return err
174
211
}
175
212
* m = ChatCompletionMessage (multiMsg )
176
213
return nil
177
214
}
178
215
216
+ type ChatCompletionAudio struct {
217
+ // Unique identifier for this audio response.
218
+ ID string `json:"id"`
219
+ // The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
220
+ ExpiresAt int64 `json:"expires_at"`
221
+ // Base64 encoded audio bytes generated by the model, in the format specified in the request.
222
+ Data string `json:"data"`
223
+ // Transcript of the audio generated by the model.
224
+ Transcript string `json:"transcript"`
225
+ }
226
+
179
227
type ToolCall struct {
180
228
// Index is not nil only in chat completion chunk object
181
229
Index * int `json:"index,omitempty"`
@@ -260,6 +308,11 @@ type ChatCompletionRequest struct {
260
308
Store bool `json:"store,omitempty"`
261
309
// Metadata to store with the completion.
262
310
Metadata map [string ]string `json:"metadata,omitempty"`
311
+ // Output types that you would like the model to generate for this request. Most models are capable of generating text, which is the default: ["text"]
312
+ // The gpt-4o-audio-preview model can also be used to generate audio. To request that this model generate both text and audio responses, you can use: ["text", "audio"]
313
+ Modalities []Modality `json:"modalities,omitempty"`
314
+ // Parameters for audio output. Required when audio output is requested with modalities: ["audio"]
315
+ Audio * AudioOutput `json:"audio,omitempty"`
263
316
}
264
317
265
318
type StreamOptions struct {
0 commit comments