功能描述
文字转语音(TTS)和音色克隆是即时通信 IM SDK 提供的 AI 能力,通过
callExperimentalAPI (Java / Swift / Objective-C / C++) 实验性接口调用。主要包含以下功能:文字转语音(TTS):将文本内容转换为语音音频文件,支持指定音色和音频格式。
音色克隆:基于一段参考音频,克隆出自定义音色,后续可在 TTS 中使用。
自定义音色列表拉取:获取当前用户已克隆的所有自定义音色。
自定义音色删除:删除不再需要的克隆音色。
说明:
文字转语音及音色克隆是增值付费功能,目前正在内测阶段。试用需通过 工单开通申请。
音色克隆、自定义音色列表拉取和自定义音色删除功能在 8.9 及以上版本支持。
所有功能均通过
callExperimentalAPI 接口调用,各平台统一使用 JSON 参数传递。文字转语音功能在 8.9 及以上版本支持。
效果展示
文字转语音 | 音色克隆 |
![]() | ![]() |
接口说明
文字转语音(convertTextToVoice)
将文本内容转换为语音音频,返回音频文件的下载 URL。
请求参数:
参数 | 类型 | 必填 | 说明 |
text | String | 是 | 需要转语音的文字内容(长度 1-255)。 |
voiceId | String | 否 | TTS 音色 ID,可使用系统预置音色或克隆的自定义音色 ID。 |
audioFormat | String | 否 | 音频格式,支持 pcm、wav。 |
language | String | 否 | 语言代码(参考 ISO 639-1),如 zh、en 等。 |
返回参数:
参数 | 类型 | 说明 |
audioUrl | String | 生成的音频文件下载地址。 |
requestId | String | 唯一请求 ID。 |
示例代码:
public void convertTextToVoice(String text, String voiceId, String audioFormat) {try {JSONObject params = new JSONObject();params.put("text", text);// 选填参数if (voiceId != null && !voiceId.isEmpty()) {params.put("voiceId", voiceId);}if (audioFormat != null && !audioFormat.isEmpty()) {params.put("audioFormat", audioFormat);}String jsonString = params.toString();V2TIMManager.getInstance().callExperimentalAPI("convertTextToVoice",jsonString,new V2TIMValueCallback<Object>() {@Overridepublic void onSuccess(Object result) {if (result instanceof HashMap) {HashMap<String, Object> map = (HashMap<String, Object>) result;String audioUrl = (String) map.get("audioUrl");String requestId = (String) map.get("requestId");Log.i("TTS", "Success - audio_url: " + audioUrl + ", request_id: " + requestId);}}@Overridepublic void onError(int code, String desc) {Log.e("TTS", "Failed - code: " + code + ", message: " + desc);}});} catch (Exception e) {Log.e("TTS", "Failed to serialize parameters: " + e.getMessage());}}
func convertTextToVoice(text: String, voiceId: String? = nil, audioFormat: String? = nil) {var params: [String: Any] = ["text": text]// 选填参数if let voiceId = voiceId, !voiceId.isEmpty {params["voiceId"] = voiceId}if let audioFormat = audioFormat, !audioFormat.isEmpty {params["audioFormat"] = audioFormat}guard let jsonData = try? JSONSerialization.data(withJSONObject: params),let jsonString = String(data: jsonData, encoding: .utf8) else {print("Failed to serialize parameters")return}V2TIMManager.sharedInstance().callExperimentalAPI(api: "convertTextToVoice",param: jsonString as NSObject,succ: { result inif let dict = result as? [String: Any] {let audioUrl = dict["audioUrl"] as? String ?? ""let requestId = dict["requestId"] as? String ?? ""print("Success - audio_url: \\(audioUrl), request_id: \\(requestId)")}},fail: { code, message inprint("Failed - code: \\(code), message: \\(message ?? "")")})}
void ConvertTextToVoice() {json::Object json_param;json_param[kTIMRequestInternalOperation] = kTIMInternalOperationConvertTextToVoice;// 需要转语音的文字内容(必填,长度 1-255)json_param[kTIMRequestConvertTextToVoiceText] = "你好,这是一段测试文字转语音的内容";// TTS 声音 ID(选填)// json_param[kTIMRequestConvertTextToVoiceVoiceId] = "";// 音频格式: pcm wav(选填)json_param[kTIMRequestConvertTextToVoiceAudioFormat] = "wav";// 语言代码(选填)json_param[kTIMRequestConvertTextToVoiceLanguage] = "";std::string json_parameter = json::Serialize(json_param);int ret = callExperimentalAPI(json_parameter.c_str(),[](int32_t code, const char* desc, const char* json_params, const void* user_data) {printf("\\nConvertTextToVoice|code:%d|desc:%s\\n", code, desc);if (code == 0 && json_params) {json::Value json = json::Deserialize(json_params);if (json.HasKey(kTIMResponseConvertTextToVoiceAudioUrl)) {std::string audio_url = json[kTIMResponseConvertTextToVoiceAudioUrl];Printf("audio_url: %s\\n", audio_url.c_str());}if (json.HasKey(kTIMResponseConvertTextToVoiceRequestId)) {std::string request_id = json[kTIMResponseConvertTextToVoiceRequestId];Printf("request_id: %s\\n", request_id.c_str());}}}, nullptr);Printf("ConvertTextToVoice|ret:%d\\n", ret);}
音色克隆(voiceClone)
基于参考音频克隆出自定义音色,克隆成功后返回音色 ID,可用于后续 TTS 调用。
请求参数:
参数 | 类型 | 必填 | 说明 |
audioUrl | String | 是 | 参考音频 URL(要求 16k 单声道 wav 格式,时长 10-18 秒)。 |
voiceName | String | 是 | 声音克隆的名称。 |
promptText | String | 否 | 参考音频对应的文本内容。 |
language | String | 否 | 语言代码(参考 ISO 639-1)。 |
返回参数:
参数 | 类型 | 说明 |
voiceId | String | 克隆出的音色 ID,可用于 TTS 接口的 voiceId 参数。 |
requestId | String | 唯一请求 ID。 |
说明:
参考音频要求为 16k 采样率、单声道的 wav 格式文件,时长建议在 10-18 秒之间。
音频需要先上传获取 URL(可通过 SDK 的文件上传接口获取),再传入 audioUrl。
示例代码:
JSONObject params = new JSONObject();params.put("audioUrl", "https://example.com/voice.wav");params.put("voiceName", "MyVoice");params.put("promptText", ""); // 选填V2TIMManager.getInstance().callExperimentalAPI("voiceClone", params.toString(),new V2TIMValueCallback<Object>() {@Overridepublic void onSuccess(Object result) {try {JSONObject resultJson = new JSONObject((String) result);String voiceId = resultJson.optString("voiceId");Log.d("TAG", "voiceId: " + voiceId);} catch (JSONException e) {Log.e("TAG", "parse error");}}@Overridepublic void onError(int code, String desc) {Log.e("TAG", "error: " + code + ", " + desc);}});
let params: [String: Any] = ["audioUrl": "https://example.com/voice.wav","voiceName": "MyVoice","promptText": "" // 选填]guard let jsonData = try? JSONSerialization.data(withJSONObject: params) else { print("Failed to serialize parameters"); return }guard let jsonString = String(data: jsonData, encoding: .utf8) else { print("Failed to convert to string"); return }V2TIMManager.sharedInstance().callExperimentalAPI(api: "voiceClone",param: jsonString as NSObject,succ: { result inguard let resultString = result as? String,let resultData = resultString.data(using: .utf8),let resultDict = try? JSONSerialization.jsonObject(with: resultData) as? [String: Any],let voiceId = resultDict["voiceId"] as? String else {print("parse error")return}print("voiceId: \\(voiceId)")},fail: { code, desc inprint("error: \\(code), \\(desc ?? "")")})
void VoiceClone() {json::Object json_param;json_param[kTIMRequestInternalOperation] = kTIMInternalOperationVoiceClone;json_param[kTIMRequestVoiceCloneVoiceName] = "MyVoice"; // 必填json_param[kTIMRequestVoiceCloneAudioUrl] = "https://example.com/voice.wav"; // 必填json_param[kTIMRequestVoiceClonePromptText] = ""; // 选填json_param[kTIMRequestVoiceCloneLanguage] = ""; // 选填std::string param_str = json::Serialize(json_param);int ret = callExperimentalAPI(param_str.c_str(),[](int32_t code, const char* desc, const char* json_params, const void* user_data) {if (code != TIM_SUCC) {printf("voiceClone failed: %d, %s\\n", code, desc);return;}json::Value json_value = json::Deserialize(json_params);std::string voice_id = Json2String(json_value, kTIMResponseVoiceCloneVoiceId, "");std::string request_id = Json2String(json_value, kTIMResponseVoiceCloneRequestId, "");printf("voiceClone success: voiceId=%s, requestId=%s\\n", voice_id.c_str(), request_id.c_str());}, nullptr);}
获取自定义音色列表(getCustomVoiceList)
获取当前用户已克隆的所有自定义音色。
请求参数: 无需额外参数。
返回参数:
参数 | 类型 | 说明 |
voiceList | Array | 自定义音色列表,每个元素包含 voice_id(音色 ID)和 name(音色名称)。 |
示例代码:
V2TIMManager.getInstance().callExperimentalAPI("getCustomVoiceList",null,new V2TIMValueCallback<Object>() {@Overridepublic void onSuccess(Object result) {List<Map<String, String>> voiceList = (List<Map<String, String>>) result;for (Map<String, String> item : voiceList) {String voiceId = item.get("voice_id");String name = item.get("name");Log.d(TAG, "Voice ID: " + voiceId + ", Name: " + name);}}@Overridepublic void onError(int code, String desc) {Log.e(TAG, "getCustomVoiceList failed: " + code + ", " + desc);}});
V2TIMManager.sharedInstance().callExperimentalAPI(api: "getCustomVoiceList",param: nil,succ: { result in// 解析返回的音色列表print("getCustomVoiceList success: \\(result ?? "")")},fail: { code, desc inprint("error: \\(code), \\(desc ?? "")")})
void GetCustomVoiceList() {json::Object json_param;json_param[kTIMRequestInternalOperation] = kTIMInternalOperationGetCustomVoiceList;std::string json_parameters = json::Serialize(json_param);int ret = callExperimentalAPI(json_parameters.c_str(),[](int32_t code, const char* desc, const char* json_params, const void* user_data) {printf("GetCustomVoiceList: code=%d desc=%s\\n", code, desc);if (code == 0 && json_params) {json::Object result = json::Deserialize(json_params);if (result.HasKey(kTIMResponseGetCustomVoiceListVoiceList)) {json::Array voice_list = result[kTIMResponseGetCustomVoiceListVoiceList];printf(" voice_list count = %zu\\n", voice_list.size());for (auto& item : voice_list) {json::Object voice_item = item;std::string voice_id = voice_item[kTIMCustomVoiceItemVoiceId];std::string name = voice_item[kTIMCustomVoiceItemName];Printf(" voice_id = %s, name = %s\\n", voice_id.c_str(), name.c_str());}}}}, nullptr);}
删除自定义音色(deleteCustomVoice)
删除指定的克隆音色。
请求参数:
参数 | 类型 | 必填 | 说明 |
voiceId | String | 是 | 要删除的音色 ID。 |
示例代码:
JSONObject params = new JSONObject();params.put("voiceId", "voice_xxx");V2TIMManager.getInstance().callExperimentalAPI("deleteCustomVoice", params.toString(),new V2TIMValueCallback<Object>() {@Overridepublic void onSuccess(Object result) {Log.d("TAG", "delete success");}@Overridepublic void onError(int code, String desc) {Log.e("TAG", "error: " + code + ", " + desc);}});
let params: [String: Any] = ["voiceId": "voice_xxx"]guard let jsonData = try? JSONSerialization.data(withJSONObject: params) else { print("Failed to serialize parameters"); return }guard let jsonString = String(data: jsonData, encoding: .utf8) else { print("Failed to convert to string"); return }V2TIMManager.sharedInstance().callExperimentalAPI(api: "deleteCustomVoice",param: jsonString as NSObject,succ: { _ inprint("delete success")},fail: { code, desc inprint("error: \\(code), \\(desc ?? "")")})
void CallDeleteCustomVoice() {json::Object json_param;json_param[kTIMRequestInternalOperation] = kTIMInternalOperationDeleteCustomVoice;json_param[kTIMRequestDeleteCustomVoiceVoiceId] = "voice_xxx"; // 必填std::string param_str = json::Serialize(json_param);callExperimentalAPI(param_str.c_str(), [](int32_t code, const char* desc, const char* json_params, const void* user_data) {if (code != TIM_SUCC) {printf("deleteCustomVoice failed: %d, %s\\n", code, desc);return;}printf("deleteCustomVoice success\\n");}, nullptr);}

