Skip to content

Commit 54ccae8

Browse files
authored
Merge pull request #1518 from shane04111/add_local_tts_change_voice
update: 添加本地tts支援面板的"角色音色"
2 parents 43d47a4 + 3015b01 commit 54ccae8

File tree

11 files changed

+87
-19
lines changed

11 files changed

+87
-19
lines changed

main/manager-api/src/main/java/xiaozhi/modules/config/service/impl/ConfigServiceImpl.java

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ public Object getConfig(Boolean isCache) {
7272
null,
7373
null,
7474
null,
75+
null,
76+
null,
7577
agent.getVadModelId(),
7678
agent.getAsrModelId(),
7779
null,
@@ -108,9 +110,13 @@ public Map<String, Object> getAgentModels(String macAddress, Map<String, String>
108110
}
109111
// 获取音色信息
110112
String voice = null;
113+
String referenceAudio = null;
114+
String referenceText = null;
111115
TimbreDetailsVO timbre = timbreService.get(agent.getTtsVoiceId());
112116
if (timbre != null) {
113117
voice = timbre.getTtsVoice();
118+
referenceAudio = timbre.getReferenceAudio();
119+
referenceText = timbre.getReferenceText();
114120
}
115121
// 构建返回数据
116122
Map<String, Object> result = new HashMap<>();
@@ -163,6 +169,8 @@ public Map<String, Object> getAgentModels(String macAddress, Map<String, String>
163169
agent.getSystemPrompt(),
164170
agent.getSummaryMemory(),
165171
voice,
172+
referenceAudio,
173+
referenceText,
166174
agent.getVadModelId(),
167175
agent.getAsrModelId(),
168176
agent.getLlmModelId(),
@@ -179,7 +187,7 @@ public Map<String, Object> getAgentModels(String macAddress, Map<String, String>
179187
/**
180188
* 构建配置信息
181189
*
182-
* @param paramsList 系统参数列表
190+
* @param config 系统参数列表
183191
* @return 配置信息
184192
*/
185193
private Object buildConfig(Map<String, Object> config) {
@@ -250,21 +258,25 @@ private Object buildConfig(Map<String, Object> config) {
250258
/**
251259
* 构建模块配置
252260
*
253-
* @param prompt 提示词
254-
* @param voice 音色
255-
* @param vadModelId VAD模型ID
256-
* @param asrModelId ASR模型ID
257-
* @param llmModelId LLM模型ID
258-
* @param ttsModelId TTS模型ID
259-
* @param memModelId 记忆模型ID
260-
* @param intentModelId 意图模型ID
261-
* @param result 结果Map
261+
* @param prompt 提示词
262+
* @param voice 音色
263+
* @param referenceAudio 参考音频路径
264+
* @param referenceText 参考文本
265+
* @param vadModelId VAD模型ID
266+
* @param asrModelId ASR模型ID
267+
* @param llmModelId LLM模型ID
268+
* @param ttsModelId TTS模型ID
269+
* @param memModelId 记忆模型ID
270+
* @param intentModelId 意图模型ID
271+
* @param result 结果Map
262272
*/
263273
private void buildModuleConfig(
264274
String assistantName,
265275
String prompt,
266276
String summaryMemory,
267277
String voice,
278+
String referenceAudio,
279+
String referenceText,
268280
String vadModelId,
269281
String asrModelId,
270282
String llmModelId,
@@ -290,8 +302,10 @@ private void buildModuleConfig(
290302
if (model.getConfigJson() != null) {
291303
typeConfig.put(model.getId(), model.getConfigJson());
292304
// 如果是TTS类型,添加private_voice属性
293-
if ("TTS".equals(modelTypes[i]) && voice != null) {
294-
((Map<String, Object>) model.getConfigJson()).put("private_voice", voice);
305+
if ("TTS".equals(modelTypes[i])){
306+
if (voice != null) ((Map<String, Object>) model.getConfigJson()).put("private_voice", voice);
307+
if (referenceAudio != null) ((Map<String, Object>) model.getConfigJson()).put("ref_audio", referenceAudio);
308+
if (referenceText != null) ((Map<String, Object>) model.getConfigJson()).put("ref_text", referenceText);
295309
}
296310
// 如果是Intent类型,且type=intent_llm,则给他添加附加模型
297311
if ("Intent".equals(modelTypes[i])) {

main/manager-api/src/main/java/xiaozhi/modules/timbre/dto/TimbreDataDTO.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ public class TimbreDataDTO {
2626
@Schema(description = "备注")
2727
private String remark;
2828

29+
@Schema(description = "参考音频路径")
30+
private String referenceAudio;
31+
32+
@Schema(description = "參考文本")
33+
private String referenceText;
34+
2935
@Schema(description = "排序")
3036
@Min(value = 0, message = "{sort.number}")
3137
private long sort;

main/manager-api/src/main/java/xiaozhi/modules/timbre/entity/TimbreEntity.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ public class TimbreEntity {
3434
@Schema(description = "备注")
3535
private String remark;
3636

37+
@Schema(description = "参考音频路径")
38+
private String referenceAudio;
39+
40+
@Schema(description = "參考文本")
41+
private String referenceText;
42+
3743
@Schema(description = "排序")
3844
private long sort;
3945

main/manager-api/src/main/java/xiaozhi/modules/timbre/vo/TimbreDetailsVO.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ public class TimbreDetailsVO implements Serializable {
2525
@Schema(description = "备注")
2626
private String remark;
2727

28+
@Schema(description = "参考音频路径")
29+
private String referenceAudio;
30+
31+
@Schema(description = "參考文本")
32+
private String referenceText;
33+
2834
@Schema(description = "排序")
2935
private long sort;
3036

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ALTER TABLE `ai_tts_voice`
2+
ADD COLUMN `reference_audio` VARCHAR(500) DEFAULT NULL COMMENT '参考音频路径' AFTER `remark`,
3+
ADD COLUMN `reference_text` VARCHAR(500) DEFAULT NULL COMMENT '参考文本' AFTER `reference_audio`;

main/manager-api/src/main/resources/db/changelog/db.changelog-master.yaml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,13 @@ databaseChangeLog:
205205
- sqlFile:
206206
encoding: utf8
207207
path: classpath:db/changelog/202506080955.sql
208+
- changeSet:
209+
id: 202506091720
210+
author: shane0411
211+
changes:
212+
- sqlFile:
213+
encoding: utf8
214+
path: classpath:db/changelog/202506091720.sql
208215
- changeSet:
209216
id: 202506161101
210217
author: hrz
@@ -232,4 +239,4 @@ databaseChangeLog:
232239
changes:
233240
- sqlFile:
234241
encoding: utf8
235-
path: classpath:db/changelog/202506261637.sql
242+
path: classpath:db/changelog/202506261637.sql

main/manager-web/src/apis/module/timbre.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ export default {
3434
languages: params.languageType,
3535
name: params.voiceName,
3636
remark: params.remark,
37+
referenceAudio: params.referenceAudio,
38+
referenceText: params.referenceText,
3739
sort: params.sort,
3840
ttsModelId: params.ttsModelId,
3941
ttsVoice: params.voiceCode,
@@ -75,6 +77,8 @@ export default {
7577
languages: params.languageType,
7678
name: params.voiceName,
7779
remark: params.remark,
80+
referenceAudio: params.referenceAudio,
81+
referenceText: params.referenceText,
7882
ttsModelId: params.ttsModelId,
7983
ttsVoice: params.voiceCode,
8084
voiceDemo: params.voiceDemo || ''

main/manager-web/src/components/TtsModel.vue

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,18 @@
7474
<span v-else>{{ scope.row.remark }}</span>
7575
</template>
7676
</el-table-column>
77+
<el-table-column label="音频路径" align="center">
78+
<template slot-scope="scope">
79+
<el-input v-if="scope.row.editing" v-model="scope.row.referenceAudio" placeholder="这里是参考音频路径 (本地模型用)"></el-input>
80+
<span v-else>{{ scope.row.referenceAudio }}</span>
81+
</template>
82+
</el-table-column>
83+
<el-table-column label="音频文本" align="center">
84+
<template slot-scope="scope">
85+
<el-input v-if="scope.row.editing" v-model="scope.row.referenceText" placeholder="这里是参考音频文本 (本地模型用)"></el-input>
86+
<span v-else>{{ scope.row.referenceText }}</span>
87+
</template>
88+
</el-table-column>
7789
<el-table-column label="操作" align="center" width="150">
7890
<template slot-scope="scope">
7991
<template v-if="!scope.row.editing">
@@ -206,6 +218,8 @@ export default {
206218
voiceName: item.name || '未命名音色',
207219
languageType: item.languages || '',
208220
remark: item.remark || '',
221+
referenceAudio: item.referenceAudio || '',
222+
referenceText: item.referenceText || '',
209223
voiceDemo: item.voiceDemo || '',
210224
selected: false,
211225
editing: false,
@@ -351,6 +365,8 @@ export default {
351365
voiceName: row.voiceName,
352366
languageType: row.languageType,
353367
remark: row.remark,
368+
referenceAudio: row.referenceAudio,
369+
referenceText: row.referenceText,
354370
ttsModelId: this.ttsModelId,
355371
voiceDemo: row.voiceDemo || '',
356372
sort: row.sort
@@ -432,6 +448,8 @@ export default {
432448
languageType: '中文',
433449
voiceDemo: '',
434450
remark: '',
451+
referenceAudio: '',
452+
referenceText: '',
435453
selected: false,
436454
editing: true,
437455
sort: maxSort + 1

main/xiaozhi-server/core/providers/tts/fishspeech.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,12 @@ def __init__(self, config, delete_audio_file):
8585
self.reference_id = (
8686
None if not config.get("reference_id") else config.get("reference_id")
8787
)
88-
self.reference_audio = parse_string_to_list(config.get("reference_audio"))
89-
self.reference_text = parse_string_to_list(config.get("reference_text"))
88+
self.reference_audio = parse_string_to_list(
89+
config.get('ref_audio')if config.get('ref_audio') else config.get("reference_audio")
90+
)
91+
self.reference_text = parse_string_to_list(
92+
config.get('ref_text')if config.get('ref_text') else config.get("reference_text")
93+
)
9094
self.format = config.get("response_format", "wav")
9195
self.audio_file_type = config.get("response_format", "wav")
9296
self.api_key = config.get("api_key", "YOUR_API_KEY")

main/xiaozhi-server/core/providers/tts/gpt_sovits_v2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ def __init__(self, config, delete_audio_file):
1212
super().__init__(config, delete_audio_file)
1313
self.url = config.get("url")
1414
self.text_lang = config.get("text_lang", "zh")
15-
self.ref_audio_path = config.get("ref_audio_path")
16-
self.prompt_text = config.get("prompt_text")
15+
self.ref_audio_path = config.get('ref_audio') if config.get('ref_audio') else config.get("ref_audio_path")
16+
self.prompt_text = config.get('ref_text') if config.get('ref_text') else config.get("prompt_text")
1717
self.prompt_lang = config.get("prompt_lang", "zh")
1818

1919
# 处理空字符串的情况

0 commit comments

Comments
 (0)