Skip to content

Commit 84f6da5

Browse files
committed
GPT-5 models handling + new verbosity parameter
1 parent bb1d055 commit 84f6da5

File tree

8 files changed

+141
-42
lines changed

8 files changed

+141
-42
lines changed

openai-client/src/main/scala/io/cequence/openaiscala/service/impl/EndPoint.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ object Param {
118118
case object thread extends Param
119119
case object store extends Param
120120
case object reasoning_effort extends Param
121+
case object verbosity extends Param
121122
case object service_tier extends Param
122123
case object web_search_options extends Param
123124
case object include extends Param

openai-client/src/main/scala/io/cequence/openaiscala/service/impl/OpenAIChatCompletionServiceImpl.scala

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,16 @@ trait ChatCompletionBodyMaker {
7575
ModelId.o4_mini_2025_04_16
7676
)
7777

78+
private val gpt5Models = Set(
79+
ModelId.gpt_5,
80+
ModelId.gpt_5_2025_08_07,
81+
ModelId.gpt_5_mini,
82+
ModelId.gpt_5_mini_2025_08_07,
83+
ModelId.gpt_5_nano,
84+
ModelId.gpt_5_nano_2025_08_07,
85+
ModelId.gpt_5_chat_latest
86+
)
87+
7888
protected def createBodyParamsForChatCompletion(
7989
messagesAux: Seq[BaseMessage],
8090
settings: CreateChatCompletionSettings,
@@ -91,12 +101,14 @@ trait ChatCompletionBodyMaker {
91101

92102
val messageJsons = messagesFinal.map(Json.toJson(_)(messageWrites))
93103

94-
// regular O models need some special treatment... revisit this later
104+
// revisit this later
95105
val settingsFinal =
96106
if (o1PreviewModels.contains(settings.model))
97107
ChatCompletionSettingsConversions.o1Preview(settings)
98108
else if (regularOModels.contains(settings.model))
99109
ChatCompletionSettingsConversions.o(settings)
110+
else if (gpt5Models.contains(settings.model))
111+
ChatCompletionSettingsConversions.gpt5(settings)
100112
else
101113
settings
102114

@@ -136,6 +148,7 @@ trait ChatCompletionBodyMaker {
136148
Param.parallel_tool_calls -> settingsFinal.parallel_tool_calls,
137149
Param.store -> settingsFinal.store,
138150
Param.reasoning_effort -> settingsFinal.reasoning_effort.map(_.toString()),
151+
Param.verbosity -> settingsFinal.verbosity.map(_.toString()),
139152
Param.service_tier -> settingsFinal.service_tier.map(_.toString()),
140153
Param.metadata -> (if (settingsFinal.metadata.nonEmpty) Some(settingsFinal.metadata)
141154
else None),

openai-core/src/main/scala/io/cequence/openaiscala/JsonFormats.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import play.api.libs.json.JsonNaming.SnakeCase
3636
import play.api.libs.json.{Format, JsValue, Json, _}
3737

3838
import java.{util => ju}
39+
import io.cequence.openaiscala.domain.settings.Verbosity
3940

4041
object JsonFormats {
4142
private implicit lazy val dateFormat: Format[ju.Date] = JsonUtil.SecDateFormat
@@ -367,6 +368,12 @@ object JsonFormats {
367368
ReasoningEffort.high
368369
)
369370

371+
implicit val verbosityFormat: Format[Verbosity] = enumFormat[Verbosity](
372+
Verbosity.low,
373+
Verbosity.medium,
374+
Verbosity.high
375+
)
376+
370377
implicit val serviceTierFormat: Format[ServiceTier] = enumFormat[ServiceTier](
371378
ServiceTier.auto,
372379
ServiceTier.default

openai-core/src/main/scala/io/cequence/openaiscala/OpenAIScalaClientException.scala

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,18 @@ object Retryable {
99
): Option[OpenAIScalaClientException] = Some(t).filter(apply)
1010

1111
def apply(t: OpenAIScalaClientException): Boolean = t match {
12-
// we retry on these
13-
case _: OpenAIScalaClientTimeoutException => true
14-
case _: OpenAIScalaRateLimitException => true
15-
case _: OpenAIScalaServerErrorException => true
16-
case _: OpenAIScalaEngineOverloadedException => true
12+
// we don't retry on these
13+
case _: OpenAIScalaClientUnknownHostException => false
14+
case _: OpenAIScalaTokenCountExceededException => false
15+
case _: OpenAIScalaUnauthorizedException => false
1716

18-
// otherwise don't retry
19-
case _ => false
17+
// we retry on these
18+
case _: OpenAIScalaClientTimeoutException => true
19+
case _: OpenAIScalaRateLimitException => true
20+
case _: OpenAIScalaServerErrorException => true
21+
case _: OpenAIScalaEngineOverloadedException => true
22+
// generic case
23+
case _: OpenAIScalaClientException => true
2024
}
2125
}
2226

openai-core/src/main/scala/io/cequence/openaiscala/domain/settings/CreateChatCompletionSettings.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,12 @@ case class CreateChatCompletionSettings(
8686
// Supported by o1 models only
8787
reasoning_effort: Option[ReasoningEffort] = None,
8888

89+
// Specifies the verbosity of the model's response. Only supported by gpt-5 models.
90+
// Verbosity determines how many output tokens are generated. Lowering the number of tokens reduces overall latency. While the model's reasoning approach stays mostly the same, the model finds ways to answer more concisely—which can either improve or diminish answer quality, depending on your use case. Here are some scenarios for both ends of the verbosity spectrum:
91+
// High verbosity: Use when you need the model to provide thorough explanations of documents or perform extensive code refactoring.
92+
// Low verbosity: Best for situations where you want concise answers or simple code generation, such as SQL queries.
93+
verbosity: Option[Verbosity] = None,
94+
8995
// Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service:
9096
// If set to 'auto', and the Project is Scale tier enabled, the system will utilize scale tier credits until they are exhausted.
9197
// If set to 'auto', and the Project is not Scale tier enabled, the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.
@@ -141,6 +147,14 @@ object ReasoningEffort {
141147
case object high extends ReasoningEffort
142148
}
143149

150+
sealed trait Verbosity extends EnumValue
151+
152+
object Verbosity {
153+
case object low extends Verbosity
154+
case object medium extends Verbosity
155+
case object high extends Verbosity
156+
}
157+
144158
sealed trait ServiceTier extends EnumValue
145159

146160
object ServiceTier {

openai-core/src/main/scala/io/cequence/openaiscala/service/OpenAIChatCompletionExtra.scala

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import play.api.libs.json.{Format, JsValue, Json}
2222

2323
import scala.concurrent.{ExecutionContext, Future}
2424
import com.fasterxml.jackson.core.JsonProcessingException
25+
import io.cequence.openaiscala.OpenAIScalaClientException
2526

2627
object OpenAIChatCompletionExtra {
2728

@@ -44,9 +45,28 @@ object OpenAIChatCompletionExtra {
4445
)(
4546
implicit ec: ExecutionContext,
4647
scheduler: Scheduler
48+
): Future[ChatCompletionResponse] =
49+
createChatCompletionWithFailoverSettings(
50+
messages,
51+
settings,
52+
failoverModels.map(model => settings.copy(model = model)),
53+
maxRetries,
54+
retryOnAnyError,
55+
failureMessage
56+
)
57+
58+
def createChatCompletionWithFailoverSettings(
59+
messages: Seq[BaseMessage],
60+
settings: CreateChatCompletionSettings,
61+
failoverSettings: Seq[CreateChatCompletionSettings],
62+
maxRetries: Option[Int] = Some(defaultMaxRetries),
63+
retryOnAnyError: Boolean = false,
64+
failureMessage: String
65+
)(
66+
implicit ec: ExecutionContext,
67+
scheduler: Scheduler
4768
): Future[ChatCompletionResponse] = {
48-
val failoverSettings = failoverModels.map(model => settings.copy(model = model))
49-
val allSettingsInOrder = Seq(settings) ++ failoverSettings
69+
val allSettingsInOrder = settings +: failoverSettings
5070

5171
implicit val retrySettings: RetrySettings =
5272
RetrySettings(maxRetries = maxRetries.getOrElse(0))
@@ -152,7 +172,11 @@ object OpenAIChatCompletionExtra {
152172
s"${taskNameForLoggingFinal.capitalize} finished in " + (new java.util.Date().getTime - start.getTime) + " ms."
153173
)
154174

155-
json.as[T]
175+
json.asOpt[T].getOrElse(
176+
throw new OpenAIScalaClientException(
177+
s"Failed to parse JSON response into the expected type. Response: $contentJson"
178+
)
179+
)
156180
}
157181
}
158182

@@ -181,6 +205,13 @@ object OpenAIChatCompletionExtra {
181205
}
182206

183207
private val defaultModelsSupportingJsonSchema = Seq(
208+
ModelId.gpt_5,
209+
ModelId.gpt_5_2025_08_07,
210+
ModelId.gpt_5_mini,
211+
ModelId.gpt_5_mini_2025_08_07,
212+
ModelId.gpt_5_nano,
213+
ModelId.gpt_5_nano_2025_08_07,
214+
ModelId.gpt_5_chat_latest,
184215
ModelId.gpt_4_1,
185216
ModelId.gpt_4_1_2025_04_14,
186217
ModelId.gpt_4_1_mini,
@@ -194,6 +225,8 @@ object OpenAIChatCompletionExtra {
194225
ModelId.gpt_4o_2024_11_20,
195226
ModelId.o4_mini,
196227
ModelId.o4_mini_2025_04_16,
228+
ModelId.o3_pro,
229+
ModelId.o3_pro_2025_06_10,
197230
ModelId.o3,
198231
ModelId.o3_2025_04_16,
199232
ModelId.o3_mini,
@@ -203,9 +236,12 @@ object OpenAIChatCompletionExtra {
203236
ModelId.o1_2024_12_17,
204237
ModelId.o1_pro,
205238
ModelId.o1_pro_2025_03_19,
239+
NonOpenAIModelId.gemini_2_5_pro,
240+
NonOpenAIModelId.gemini_2_5_pro_preview_06_05,
206241
NonOpenAIModelId.gemini_2_5_pro_preview_05_06,
207242
NonOpenAIModelId.gemini_2_5_pro_preview_03_25,
208243
NonOpenAIModelId.gemini_2_5_pro_exp_03_25,
244+
NonOpenAIModelId.gemini_2_5_flash,
209245
NonOpenAIModelId.gemini_2_5_flash_preview_05_20,
210246
NonOpenAIModelId.gemini_2_5_flash_preview_04_17,
211247
NonOpenAIModelId.gemini_2_5_flash_preview_04_17_thinking,

openai-core/src/main/scala/io/cequence/openaiscala/service/adapter/ChatCompletionSettingsConversions.scala

Lines changed: 48 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import io.cequence.openaiscala.domain.settings.{
77
}
88
import io.cequence.openaiscala.domain.settings.GroqCreateChatCompletionSettingsOps._
99
import org.slf4j.LoggerFactory
10+
import io.cequence.openaiscala.domain.settings.Verbosity
1011

1112
object ChatCompletionSettingsConversions {
1213

@@ -17,24 +18,25 @@ object ChatCompletionSettingsConversions {
1718
case class FieldConversionDef(
1819
doConversion: CreateChatCompletionSettings => Boolean,
1920
convert: CreateChatCompletionSettings => CreateChatCompletionSettings,
20-
loggingMessage: Option[String],
21+
loggingMessage: Option[CreateChatCompletionSettings => String],
2122
warning: Boolean = false
2223
)
2324

2425
def generic(
2526
fieldConversions: Seq[FieldConversionDef]
2627
): SettingsConversion = (settings: CreateChatCompletionSettings) =>
2728
fieldConversions.foldLeft(settings) {
28-
case (acc, FieldConversionDef(isDefined, convert, loggingMessage, warning)) =>
29+
case (acc, FieldConversionDef(isDefined, convert, maybeLoggingMessage, warning)) =>
2930
if (isDefined(acc)) {
30-
loggingMessage.foreach(message =>
31+
maybeLoggingMessage.foreach { messageFun =>
32+
val message = messageFun(acc)
3133
if (warning) logger.warn(message) else logger.debug(message)
32-
)
34+
}
3335
convert(acc)
3436
} else acc
3537
}
3638

37-
private val oBaseConversions = Seq(
39+
private lazy val newAPIConversions = Seq(
3840
// max tokens
3941
FieldConversionDef(
4042
_.max_tokens.isDefined,
@@ -44,68 +46,85 @@ object ChatCompletionSettingsConversions {
4446
extra_params =
4547
settings.extra_params + ("max_completion_tokens" -> settings.max_tokens.get)
4648
),
47-
Some("O models don't support max_tokens, converting to max_completion_tokens")
49+
Some(settings =>
50+
s"${settings.model} model doesn't support max_tokens, converting to max_completion_tokens"
51+
)
4852
),
4953
// temperature
5054
FieldConversionDef(
5155
settings => settings.temperature.isDefined && settings.temperature.get != 1,
5256
_.copy(temperature = Some(1d)),
53-
Some(
54-
"O models don't support temperature values other than the default of 1, converting to 1."
57+
Some(settings =>
58+
s"${settings.model} model doesn't support temperature values other than the default of 1, converting to 1."
5559
),
5660
warning = true
5761
),
5862
// top_p
5963
FieldConversionDef(
6064
settings => settings.top_p.isDefined && settings.top_p.get != 1,
6165
_.copy(top_p = Some(1d)),
62-
Some(
63-
"O models don't support top p values other than the default of 1, converting to 1."
66+
Some(settings =>
67+
s"${settings.model} model doesn't support top p values other than the default of 1, converting to 1."
6468
),
6569
warning = true
6670
),
6771
// presence_penalty
6872
FieldConversionDef(
6973
settings => settings.presence_penalty.isDefined && settings.presence_penalty.get != 0,
7074
_.copy(presence_penalty = Some(0d)),
71-
Some(
72-
"O models don't support presence penalty values other than the default of 0, converting to 0."
75+
Some(settings =>
76+
s"${settings.model} model doesn't support presence penalty values other than the default of 0, converting to 0."
7377
),
7478
warning = true
7579
),
7680
// frequency_penalty
7781
FieldConversionDef(
7882
settings => settings.frequency_penalty.isDefined && settings.frequency_penalty.get != 0,
7983
_.copy(frequency_penalty = Some(0d)),
80-
Some(
81-
"O models don't support frequency penalty values other than the default of 0, converting to 0."
82-
),
83-
warning = true
84-
),
85-
// parallel_tool_calls
86-
FieldConversionDef(
87-
settings => settings.parallel_tool_calls.isDefined,
88-
_.copy(parallel_tool_calls = None),
89-
Some(
90-
"O models don't support parallel tool calls, converting to None."
84+
Some(settings =>
85+
s"${settings.model} model doesn't support frequency penalty values other than the default of 0, converting to 0."
9186
),
9287
warning = true
9388
)
9489
)
9590

91+
private lazy val oBaseConversions =
92+
newAPIConversions ++ Seq(
93+
// parallel_tool_calls
94+
FieldConversionDef(
95+
settings => settings.parallel_tool_calls.isDefined,
96+
_.copy(parallel_tool_calls = None),
97+
Some(settings =>
98+
s"${settings.model} model doesn't support parallel tool calls, converting to None."
99+
),
100+
warning = true
101+
),
102+
// verbosity
103+
FieldConversionDef(
104+
settings => settings.verbosity.isDefined && settings.verbosity.get != Verbosity.medium,
105+
_.copy(verbosity = None),
106+
Some(settings =>
107+
s"${settings.model} model doesn't support verbosity values other than 'medium', converting to None."
108+
),
109+
warning = true
110+
)
111+
)
112+
96113
private val o1PreviewConversions =
97114
oBaseConversions :+
98115
// response format type
99116
FieldConversionDef(
100117
settings =>
101118
settings.response_format_type.isDefined && settings.response_format_type.get != ChatCompletionResponseFormatType.text,
102119
_.copy(response_format_type = None),
103-
Some(
104-
"O1 (preview) models don't support json object/schema response format, converting to None."
120+
Some(settings =>
121+
s"O1 (preview) model ${settings.model} model doesn't support json object/schema response format, converting to None."
105122
),
106123
warning = true
107124
)
108125

126+
val gpt5: SettingsConversion = generic(newAPIConversions)
127+
109128
val o: SettingsConversion = generic(oBaseConversions)
110129

111130
val o1Preview: SettingsConversion = generic(o1PreviewConversions)
@@ -125,8 +144,8 @@ object ChatCompletionSettingsConversions {
125144
) && settings.max_tokens.isDefined,
126145
settings =>
127146
settings.copy(max_tokens = None).setMaxCompletionTokens(settings.max_tokens.get),
128-
Some(
129-
"Groq deepseek R1 model doesn't support max_tokens, converting to max_completion_tokens."
147+
Some(settings =>
148+
s"Groq deepseek R1 model ${settings.model} model doesn't support max_tokens, converting to max_completion_tokens."
130149
)
131150
),
132151
// reasoning format
@@ -140,8 +159,8 @@ object ChatCompletionSettingsConversions {
140159
)
141160
) && reasoningFormat.isDefined,
142161
_.setReasoningFormat(reasoningFormat.get),
143-
Some(
144-
s"Setting reasoning format '${reasoningFormat.get}' for Groq deepseek R1 model."
162+
Some(settings =>
163+
s"Setting reasoning format '${reasoningFormat.get}' for Groq deepseek R1 mode ${settings.model}."
145164
)
146165
)
147166
)

0 commit comments

Comments
 (0)