Skip to content

Commit 5e2814a

Browse files
authored
Merge pull request #71 from big-mouth-cn/feature/20241105-support-4o-audio
chat completion api support new model: gpt-4o-audio-preview
2 parents d976531 + cfb53af commit 5e2814a

File tree

17 files changed

+225
-19
lines changed

17 files changed

+225
-19
lines changed

README-zh.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@ OpenAi4J是一个非官方的Java库,旨在帮助java开发者与OpenAI的GPT
2121
## 导入依赖
2222
### Gradle
2323

24-
`implementation 'io.github.lambdua:<api|client|service>:0.22.4'`
24+
`implementation 'io.github.lambdua:<api|client|service>:0.22.5'`
2525
### Maven
2626
```xml
2727

2828
<dependency>
2929
<groupId>io.github.lambdua</groupId>
3030
<artifactId>service</artifactId>
31-
<version>0.22.4</version>
31+
<version>0.22.5</version>
3232
</dependency>
3333
```
3434

@@ -61,7 +61,7 @@ static void simpleChat() {
6161
<dependency>
6262
<groupId>io.github.lambdua</groupId>
6363
<artifactId>api</artifactId>
64-
<version>0.22.4</version>
64+
<version>0.22.5</version>
6565
</dependency>
6666
```
6767

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@ applications effortlessly.
2525
## Import
2626
### Gradle
2727

28-
`implementation 'io.github.lambdua:<api|client|service>:0.22.4'`
28+
`implementation 'io.github.lambdua:<api|client|service>:0.22.5'`
2929
### Maven
3030
```xml
3131

3232
<dependency>
3333
<groupId>io.github.lambdua</groupId>
3434
<artifactId>service</artifactId>
35-
<version>0.22.4</version>
35+
<version>0.22.5</version>
3636
</dependency>
3737
```
3838

@@ -67,7 +67,7 @@ To utilize pojos, import the api module:
6767
<dependency>
6868
<groupId>io.github.lambdua</groupId>
6969
<artifactId>api</artifactId>
70-
<version>0.22.4</version>
70+
<version>0.22.5</version>
7171
</dependency>
7272
```
7373

api/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<parent>
77
<groupId>io.github.lambdua</groupId>
88
<artifactId>openai-java</artifactId>
9-
<version>0.22.4</version>
9+
<version>0.22.5</version>
1010
</parent>
1111
<packaging>jar</packaging>
1212
<artifactId>api</artifactId>

api/src/main/java/com/theokanning/openai/completion/chat/AssistantMessage.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
package com.theokanning.openai.completion.chat;
22

3-
import java.util.List;
4-
53
import com.fasterxml.jackson.annotation.JsonIgnore;
64
import com.fasterxml.jackson.annotation.JsonProperty;
75
import com.theokanning.openai.utils.JsonUtil;
8-
96
import lombok.AllArgsConstructor;
107
import lombok.Data;
118
import lombok.NoArgsConstructor;
129

10+
import java.util.List;
11+
1312
/**
1413
* @author LiangTao
1514
* @date 2024年04月10 10:31
@@ -41,6 +40,10 @@ public class AssistantMessage implements ChatMessage {
4140
*/
4241
private String refusal;
4342

43+
/**
44+
* Data about a previous audio response from the model.
45+
*/
46+
private AssistantMessageAudio audio;
4447

4548

4649
public AssistantMessage(String content) {
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package com.theokanning.openai.completion.chat;
2+
3+
import com.fasterxml.jackson.annotation.JsonProperty;
4+
import lombok.AllArgsConstructor;
5+
import lombok.Data;
6+
import lombok.NoArgsConstructor;
7+
import lombok.NonNull;
8+
9+
/**
10+
* @author Allen Hu
11+
* @date 2024/11/6
12+
*/
13+
@Data
14+
@NoArgsConstructor
15+
@AllArgsConstructor
16+
class AssistantMessageAudio {
17+
18+
/**
19+
* Unique identifier for a previous audio response from the model.
20+
*/
21+
@NonNull
22+
private String id;
23+
24+
/**
25+
* The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
26+
*/
27+
@JsonProperty("expires_at")
28+
private Integer expiresAt;
29+
30+
/**
31+
* Transcript of the audio generated by the model.
32+
*/
33+
private String transcript;
34+
35+
/**
36+
* Base64 encoded audio bytes generated by the model, in the format specified in the request.
37+
*/
38+
private String data;
39+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package com.theokanning.openai.completion.chat;
2+
3+
import lombok.AllArgsConstructor;
4+
import lombok.Data;
5+
import lombok.NoArgsConstructor;
6+
7+
/**
8+
* Parameters for audio output. Required when audio output is requested with modalities: ["audio"]
9+
*
10+
* @author Allen Hu
11+
* @date 2024/11/5
12+
*/
13+
@Data
14+
@NoArgsConstructor
15+
@AllArgsConstructor
16+
public class Audio {
17+
18+
/**
19+
* The voice the model uses to respond. Supported voices are alloy, ash, ballad, coral, echo, sage, shimmer, and verse.
20+
*/
21+
String voice;
22+
23+
/**
24+
* Specifies the output audio format. Must be one of wav, mp3, flac, opus, or pcm16.
25+
*/
26+
String format;
27+
}

api/src/main/java/com/theokanning/openai/completion/chat/ChatCompletionRequest.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,5 +168,18 @@ public class ChatCompletionRequest {
168168
@JsonProperty("parallel_tool_calls")
169169
Boolean parallelToolCalls;
170170

171+
/**
172+
* Output types that you would like the model to generate for this request. Most models are capable of generating text, which is the default:
173+
* ["text"]
174+
* The gpt-4o-audio-preview model can also be used to generate audio. To request that this model generate both text and audio responses, you can use:
175+
* ["text", "audio"]
176+
*
177+
* {@see https://platform.openai.com/docs/api-reference/chat/create#chat-create-modalities}
178+
*/
179+
List<String> modalities;
171180

181+
/**
182+
* Parameters for audio output. Required when audio output is requested with modalities: ["audio"].
183+
*/
184+
Audio audio;
172185
}

api/src/main/java/com/theokanning/openai/completion/chat/ContentDeserializer.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ ImageContent parseContent(JsonParser jsonParser) throws IOException {
4949
content.setImageUrl(parseImageUrl(jsonParser));
5050
} else if ("image_file".equals(fieldName)) {
5151
content.setImageFile(parseImageFile(jsonParser));
52+
} else if ("input_audio".equals(fieldName)) {
53+
content.setInputAudio(parseInputAudio(jsonParser));
5254
}
5355
}
5456
return content;
@@ -83,4 +85,19 @@ private ImageUrl parseImageUrl(JsonParser jsonParser) throws IOException {
8385
}
8486
return new ImageUrl(url, detail);
8587
}
88+
89+
private InputAudio parseInputAudio(JsonParser jsonParser) throws IOException {
90+
String data = null;
91+
String format = null;
92+
while (jsonParser.nextToken() != JsonToken.END_OBJECT) {
93+
String fieldName = jsonParser.getCurrentName();
94+
jsonParser.nextToken();
95+
if ("data".equals(fieldName)) {
96+
data = jsonParser.getText();
97+
} else if ("format".equals(fieldName)) {
98+
format = jsonParser.getText();
99+
}
100+
}
101+
return new InputAudio(data, format);
102+
}
86103
}

api/src/main/java/com/theokanning/openai/completion/chat/ContentSerializer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ public void serialize(Object o, JsonGenerator jsonGenerator, SerializerProvider
3535
if (ic.getType().equals("image_file")) {
3636
jsonGenerator.writeObjectField("image_file", ic.getImageFile());
3737
}
38+
if (ic.getType().equals("input_audio")) {
39+
jsonGenerator.writeObjectField("input_audio", ic.getInputAudio());
40+
}
3841
jsonGenerator.writeEndObject();
3942
}
4043
jsonGenerator.writeEndArray();

api/src/main/java/com/theokanning/openai/completion/chat/ImageContent.java

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
public class ImageContent {
2424

2525
/**
26-
* The type of the content. Either "text" or "image_url".
26+
* The type of the content. Either "text", "image_url" or "input_audio".
2727
*/
2828
@NonNull
2929
private String type;
@@ -39,6 +39,10 @@ public class ImageContent {
3939
@JsonProperty("image_file")
4040
private ImageFile imageFile;
4141

42+
@JsonInclude(JsonInclude.Include.NON_NULL)
43+
@JsonProperty("input_audio")
44+
private InputAudio inputAudio;
45+
4246

4347
public ImageContent(String text) {
4448
this.type = "text";
@@ -50,14 +54,42 @@ public ImageContent(ImageUrl imageUrl) {
5054
this.imageUrl = imageUrl;
5155
}
5256

57+
/**
58+
* @deprecated {@link #ofImagePath(Path)}
59+
*/
60+
@Deprecated
5361
public ImageContent(Path imagePath){
5462
this.type = "image_url";
5563
String imagePathString = imagePath.toAbsolutePath().toString();
5664
String extension = imagePathString.substring(imagePathString.lastIndexOf('.') + 1);
5765
this.imageUrl=new ImageUrl( "data:image/" + extension + ";base64," + encodeImage(imagePath));
5866
}
5967

60-
private String encodeImage(Path imagePath) {
68+
public ImageContent(InputAudio inputAudio) {
69+
this.type = "input_audio";
70+
this.inputAudio = inputAudio;
71+
}
72+
73+
public static ImageContent ofImagePath(Path imagePath){
74+
String imagePathString = imagePath.toAbsolutePath().toString();
75+
String extension = imagePathString.substring(imagePathString.lastIndexOf('.') + 1);
76+
ImageUrl imageUrl = new ImageUrl("data:image/" + extension + ";base64," + encode2base64(imagePath));
77+
return new ImageContent(imageUrl);
78+
}
79+
80+
public static ImageContent ofAudioPath(Path inputAudioPath) {
81+
String inputAudioPathString = inputAudioPath.toAbsolutePath().toString();
82+
String extension = inputAudioPathString.substring(inputAudioPathString.lastIndexOf('.') + 1);
83+
String base64 = encode2base64(inputAudioPath);
84+
InputAudio inputAudio = new InputAudio(base64, extension);
85+
return new ImageContent(inputAudio);
86+
}
87+
88+
/**
89+
* @deprecated use {@link #encode2base64(Path)}
90+
*/
91+
@Deprecated
92+
private static String encodeImage(Path imagePath) {
6193
byte[] fileContent;
6294
try {
6395
fileContent = Files.readAllBytes(imagePath);
@@ -67,4 +99,13 @@ private String encodeImage(Path imagePath) {
6799
}
68100
}
69101

102+
private static String encode2base64(Path path) {
103+
byte[] fileContent;
104+
try {
105+
fileContent = Files.readAllBytes(path);
106+
return Base64.getEncoder().encodeToString(fileContent);
107+
} catch (IOException e) {
108+
throw new RuntimeException(e);
109+
}
110+
}
70111
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package com.theokanning.openai.completion.chat;
2+
3+
import lombok.AllArgsConstructor;
4+
import lombok.Data;
5+
import lombok.NoArgsConstructor;
6+
import lombok.NonNull;
7+
8+
/**
9+
* @author Allen Hu
10+
* @date 2024/11/6
11+
*/
12+
@Data
13+
@NoArgsConstructor
14+
@AllArgsConstructor
15+
public class InputAudio {
16+
17+
/**
18+
* Base64 encoded audio data.
19+
*/
20+
@NonNull
21+
private String data;
22+
23+
/**
24+
* The format of the encoded audio data. Currently supports "wav" and "mp3".
25+
*/
26+
@NonNull
27+
private String format;
28+
}

api/src/main/java/com/theokanning/openai/completion/chat/UserMessage.java

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,23 @@ public static UserMessage buildImageMessage(String prompt, String... imageUrls)
8383
* @return com.theokanning.openai.completion.chat.UserMessage
8484
**/
8585
public static UserMessage buildImageMessage(String prompt, Path... imagePaths) {
86-
List<ImageContent> imageContents = Arrays.stream(imagePaths).map(ImageContent::new).collect(Collectors.toList());
86+
List<ImageContent> imageContents = Arrays.stream(imagePaths).map(ImageContent::ofImagePath).collect(Collectors.toList());
8787
imageContents.add(0, new ImageContent(prompt));
8888
return new UserMessage(imageContents);
8989
}
9090

91-
91+
/**
92+
* 构建一个音频识别请求消息,支持多个音频
93+
* @param prompt query text
94+
* @param inputAudioPaths 音频文件本地路径
95+
* @return com.theokanning.openai.completion.chat.UserMessage
96+
* @author Allen Hu
97+
* @date 2024/11/6
98+
*/
99+
public static UserMessage buildInputAudioMessage(String prompt, Path... inputAudioPaths) {
100+
List<ImageContent> imageContents = Arrays.stream(inputAudioPaths).map(ImageContent::ofAudioPath).collect(Collectors.toList());
101+
imageContents.add(0, new ImageContent(prompt));
102+
return new UserMessage(imageContents);
103+
}
92104
}
93105

client/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<parent>
77
<groupId>io.github.lambdua</groupId>
88
<artifactId>openai-java</artifactId>
9-
<version>0.22.4</version>
9+
<version>0.22.5</version>
1010
</parent>
1111
<packaging>jar</packaging>
1212

example/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>io.github.lambdua</groupId>
88
<artifactId>example</artifactId>
9-
<version>0.22.4</version>
9+
<version>0.22.5</version>
1010
<name>example</name>
1111

1212
<properties>
@@ -17,7 +17,7 @@
1717
<dependency>
1818
<groupId>io.github.lambdua</groupId>
1919
<artifactId>service</artifactId>
20-
<version>0.22.4</version>
20+
<version>0.22.5</version>
2121
</dependency>
2222

2323
</dependencies>

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
<groupId>io.github.lambdua</groupId>
77
<artifactId>openai-java</artifactId>
8-
<version>0.22.4</version>
8+
<version>0.22.5</version>
99
<packaging>pom</packaging>
1010
<description>openai java 版本</description>
1111
<url>https://github.com/Lambdua/openai-java</url>

service/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<parent>
77
<groupId>io.github.lambdua</groupId>
88
<artifactId>openai-java</artifactId>
9-
<version>0.22.4</version>
9+
<version>0.22.5</version>
1010
</parent>
1111
<packaging>jar</packaging>
1212

0 commit comments

Comments
 (0)