Android&iOS&Windows&Mac

最近更新时间:2026-05-20 17:53:30

我的收藏

功能描述

文字转语音(TTS)和音色克隆是即时通信 IM SDK 提供的 AI 能力,通过 callExperimentalAPI (Java / Swift / Objective-C / C++) 实验性接口调用。主要包含以下功能:
文字转语音(TTS):将文本内容转换为语音音频文件,支持指定音色和音频格式。
音色克隆:基于一段参考音频,克隆出自定义音色,后续可在 TTS 中使用。
自定义音色列表拉取:获取当前用户已克隆的所有自定义音色。
自定义音色删除:删除不再需要的克隆音色。
说明:
文字转语音及音色克隆是增值付费功能,目前正在内测阶段。试用需通过 工单开通申请
音色克隆、自定义音色列表拉取和自定义音色删除功能在 8.9 及以上版本支持。
所有功能均通过 callExperimentalAPI 接口调用,各平台统一使用 JSON 参数传递。
文字转语音功能在 8.9 及以上版本支持。

效果展示

文字转语音
音色克隆



接口说明

文字转语音(convertTextToVoice)

将文本内容转换为语音音频,返回音频文件的下载 URL。
请求参数:
参数
类型
必填
说明
text
String
需要转语音的文字内容(长度 1-255)。
voiceId
String
TTS 音色 ID,可使用系统预置音色或克隆的自定义音色 ID。
audioFormat
String
音频格式,支持 pcmwav
language
String
语言代码(参考 ISO 639-1),如 zhen 等。
返回参数:
参数
类型
说明
audioUrl
String
生成的音频文件下载地址。
requestId
String
唯一请求 ID。
示例代码:
Java
Swift
C++
public void convertTextToVoice(String text, String voiceId, String audioFormat) {
try {
JSONObject params = new JSONObject();
params.put("text", text);

// 选填参数
if (voiceId != null && !voiceId.isEmpty()) {
params.put("voiceId", voiceId);
}
if (audioFormat != null && !audioFormat.isEmpty()) {
params.put("audioFormat", audioFormat);
}

String jsonString = params.toString();

V2TIMManager.getInstance().callExperimentalAPI(
"convertTextToVoice",
jsonString,
new V2TIMValueCallback<Object>() {
@Override
public void onSuccess(Object result) {
if (result instanceof HashMap) {
HashMap<String, Object> map = (HashMap<String, Object>) result;
String audioUrl = (String) map.get("audioUrl");
String requestId = (String) map.get("requestId");
Log.i("TTS", "Success - audio_url: " + audioUrl + ", request_id: " + requestId);
}
}

@Override
public void onError(int code, String desc) {
Log.e("TTS", "Failed - code: " + code + ", message: " + desc);
}
}
);
} catch (Exception e) {
Log.e("TTS", "Failed to serialize parameters: " + e.getMessage());
}
}
func convertTextToVoice(text: String, voiceId: String? = nil, audioFormat: String? = nil) {
var params: [String: Any] = [
"text": text
]

// 选填参数
if let voiceId = voiceId, !voiceId.isEmpty {
params["voiceId"] = voiceId
}
if let audioFormat = audioFormat, !audioFormat.isEmpty {
params["audioFormat"] = audioFormat
}

guard let jsonData = try? JSONSerialization.data(withJSONObject: params),
let jsonString = String(data: jsonData, encoding: .utf8) else {
print("Failed to serialize parameters")
return
}

V2TIMManager.sharedInstance().callExperimentalAPI(
api: "convertTextToVoice",
param: jsonString as NSObject,
succ: { result in
if let dict = result as? [String: Any] {
let audioUrl = dict["audioUrl"] as? String ?? ""
let requestId = dict["requestId"] as? String ?? ""
print("Success - audio_url: \\(audioUrl), request_id: \\(requestId)")
}
},
fail: { code, message in
print("Failed - code: \\(code), message: \\(message ?? "")")
}
)
}
void ConvertTextToVoice() {
json::Object json_param;
json_param[kTIMRequestInternalOperation] = kTIMInternalOperationConvertTextToVoice;
// 需要转语音的文字内容(必填,长度 1-255)
json_param[kTIMRequestConvertTextToVoiceText] = "你好,这是一段测试文字转语音的内容";
// TTS 声音 ID(选填)
// json_param[kTIMRequestConvertTextToVoiceVoiceId] = "";
// 音频格式: pcm wav(选填)
json_param[kTIMRequestConvertTextToVoiceAudioFormat] = "wav";
// 语言代码(选填)
json_param[kTIMRequestConvertTextToVoiceLanguage] = "";

std::string json_parameter = json::Serialize(json_param);
int ret = callExperimentalAPI(json_parameter.c_str(),
[](int32_t code, const char* desc, const char* json_params, const void* user_data) {
printf("\\nConvertTextToVoice|code:%d|desc:%s\\n", code, desc);
if (code == 0 && json_params) {
json::Value json = json::Deserialize(json_params);
if (json.HasKey(kTIMResponseConvertTextToVoiceAudioUrl)) {
std::string audio_url = json[kTIMResponseConvertTextToVoiceAudioUrl];
Printf("audio_url: %s\\n", audio_url.c_str());
}
if (json.HasKey(kTIMResponseConvertTextToVoiceRequestId)) {
std::string request_id = json[kTIMResponseConvertTextToVoiceRequestId];
Printf("request_id: %s\\n", request_id.c_str());
}
}
}, nullptr);

Printf("ConvertTextToVoice|ret:%d\\n", ret);
}

音色克隆(voiceClone)

基于参考音频克隆出自定义音色,克隆成功后返回音色 ID,可用于后续 TTS 调用。
请求参数:
参数
类型
必填
说明
audioUrl
String
参考音频 URL(要求 16k 单声道 wav 格式,时长 10-18 秒)。
voiceName
String
声音克隆的名称。
promptText
String
参考音频对应的文本内容。
language
String
语言代码(参考 ISO 639-1)。
返回参数:
参数
类型
说明
voiceId
String
克隆出的音色 ID,可用于 TTS 接口的 voiceId 参数。
requestId
String
唯一请求 ID。
说明:
参考音频要求为 16k 采样率、单声道的 wav 格式文件,时长建议在 10-18 秒之间。
音频需要先上传获取 URL(可通过 SDK 的文件上传接口获取),再传入 audioUrl。
示例代码:
Java
Swift
C++
JSONObject params = new JSONObject();
params.put("audioUrl", "https://example.com/voice.wav");
params.put("voiceName", "MyVoice");
params.put("promptText", ""); // 选填

V2TIMManager.getInstance().callExperimentalAPI("voiceClone", params.toString(),
new V2TIMValueCallback<Object>() {
@Override
public void onSuccess(Object result) {
try {
JSONObject resultJson = new JSONObject((String) result);
String voiceId = resultJson.optString("voiceId");
Log.d("TAG", "voiceId: " + voiceId);
} catch (JSONException e) {
Log.e("TAG", "parse error");
}
}
@Override
public void onError(int code, String desc) {
Log.e("TAG", "error: " + code + ", " + desc);
}
}
);
let params: [String: Any] = [
"audioUrl": "https://example.com/voice.wav",
"voiceName": "MyVoice",
"promptText": "" // 选填
]
guard let jsonData = try? JSONSerialization.data(withJSONObject: params) else { print("Failed to serialize parameters"); return }
guard let jsonString = String(data: jsonData, encoding: .utf8) else { print("Failed to convert to string"); return }

V2TIMManager.sharedInstance().callExperimentalAPI(
api: "voiceClone",
param: jsonString as NSObject,
succ: { result in
guard let resultString = result as? String,
let resultData = resultString.data(using: .utf8),
let resultDict = try? JSONSerialization.jsonObject(with: resultData) as? [String: Any],
let voiceId = resultDict["voiceId"] as? String else {
print("parse error")
return
}
print("voiceId: \\(voiceId)")
},
fail: { code, desc in
print("error: \\(code), \\(desc ?? "")")
}
)
void VoiceClone() {
json::Object json_param;
json_param[kTIMRequestInternalOperation] = kTIMInternalOperationVoiceClone;
json_param[kTIMRequestVoiceCloneVoiceName] = "MyVoice"; // 必填
json_param[kTIMRequestVoiceCloneAudioUrl] = "https://example.com/voice.wav"; // 必填
json_param[kTIMRequestVoiceClonePromptText] = ""; // 选填
json_param[kTIMRequestVoiceCloneLanguage] = ""; // 选填

std::string param_str = json::Serialize(json_param);
int ret = callExperimentalAPI(param_str.c_str(),
[](int32_t code, const char* desc, const char* json_params, const void* user_data) {
if (code != TIM_SUCC) {
printf("voiceClone failed: %d, %s\\n", code, desc);
return;
}

json::Value json_value = json::Deserialize(json_params);
std::string voice_id = Json2String(json_value, kTIMResponseVoiceCloneVoiceId, "");
std::string request_id = Json2String(json_value, kTIMResponseVoiceCloneRequestId, "");
printf("voiceClone success: voiceId=%s, requestId=%s\\n", voice_id.c_str(), request_id.c_str());
}, nullptr);
}



获取自定义音色列表(getCustomVoiceList)

获取当前用户已克隆的所有自定义音色。
请求参数: 无需额外参数。
返回参数:
参数
类型
说明
voiceList
Array
自定义音色列表,每个元素包含 voice_id(音色 ID)和 name(音色名称)。
示例代码:
Java
Swift
C++
V2TIMManager.getInstance().callExperimentalAPI(
"getCustomVoiceList",
null,
new V2TIMValueCallback<Object>() {
@Override
public void onSuccess(Object result) {
List<Map<String, String>> voiceList = (List<Map<String, String>>) result;
for (Map<String, String> item : voiceList) {
String voiceId = item.get("voice_id");
String name = item.get("name");
Log.d(TAG, "Voice ID: " + voiceId + ", Name: " + name);
}
}

@Override
public void onError(int code, String desc) {
Log.e(TAG, "getCustomVoiceList failed: " + code + ", " + desc);
}
}
);
V2TIMManager.sharedInstance().callExperimentalAPI(
api: "getCustomVoiceList",
param: nil,
succ: { result in
// 解析返回的音色列表
print("getCustomVoiceList success: \\(result ?? "")")
},
fail: { code, desc in
print("error: \\(code), \\(desc ?? "")")
}
)
void GetCustomVoiceList() {
json::Object json_param;
json_param[kTIMRequestInternalOperation] = kTIMInternalOperationGetCustomVoiceList;

std::string json_parameters = json::Serialize(json_param);
int ret = callExperimentalAPI(json_parameters.c_str(),
[](int32_t code, const char* desc, const char* json_params, const void* user_data) {
printf("GetCustomVoiceList: code=%d desc=%s\\n", code, desc);
if (code == 0 && json_params) {
json::Object result = json::Deserialize(json_params);

if (result.HasKey(kTIMResponseGetCustomVoiceListVoiceList)) {
json::Array voice_list = result[kTIMResponseGetCustomVoiceListVoiceList];
printf(" voice_list count = %zu\\n", voice_list.size());
for (auto& item : voice_list) {
json::Object voice_item = item;
std::string voice_id = voice_item[kTIMCustomVoiceItemVoiceId];
std::string name = voice_item[kTIMCustomVoiceItemName];
Printf(" voice_id = %s, name = %s\\n", voice_id.c_str(), name.c_str());
}
}
}
}, nullptr);
}



删除自定义音色(deleteCustomVoice)

删除指定的克隆音色。
请求参数:
参数
类型
必填
说明
voiceId
String
要删除的音色 ID。
示例代码:
Java
Swift
C++
JSONObject params = new JSONObject();
params.put("voiceId", "voice_xxx");

V2TIMManager.getInstance().callExperimentalAPI("deleteCustomVoice", params.toString(),
new V2TIMValueCallback<Object>() {
@Override
public void onSuccess(Object result) {
Log.d("TAG", "delete success");
}
@Override
public void onError(int code, String desc) {
Log.e("TAG", "error: " + code + ", " + desc);
}
}
);
let params: [String: Any] = ["voiceId": "voice_xxx"]
guard let jsonData = try? JSONSerialization.data(withJSONObject: params) else { print("Failed to serialize parameters"); return }
guard let jsonString = String(data: jsonData, encoding: .utf8) else { print("Failed to convert to string"); return }

V2TIMManager.sharedInstance().callExperimentalAPI(
api: "deleteCustomVoice",
param: jsonString as NSObject,
succ: { _ in
print("delete success")
},
fail: { code, desc in
print("error: \\(code), \\(desc ?? "")")
}
)
void CallDeleteCustomVoice() {
json::Object json_param;
json_param[kTIMRequestInternalOperation] = kTIMInternalOperationDeleteCustomVoice;
json_param[kTIMRequestDeleteCustomVoiceVoiceId] = "voice_xxx"; // 必填

std::string param_str = json::Serialize(json_param);
callExperimentalAPI(param_str.c_str(), [](int32_t code, const char* desc, const char* json_params, const void* user_data) {
if (code != TIM_SUCC) {
printf("deleteCustomVoice failed: %d, %s\\n", code, desc);
return;
}
printf("deleteCustomVoice success\\n");
}, nullptr);
}