|
@@ -0,0 +1,124 @@
|
|
|
+package com.fdkankan.scene.util;
|
|
|
+
|
|
|
+import cn.hutool.core.io.FileUtil;
|
|
|
+import com.google.gson.Gson;
|
|
|
+import com.tencent.core.utils.ByteUtils;
|
|
|
+import com.tencent.core.ws.Credential;
|
|
|
+import com.tencent.core.ws.SpeechClient;
|
|
|
+import com.tencent.tts.utils.Ttsutils;
|
|
|
+import com.tencent.ttsv2.*;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.springframework.beans.factory.annotation.Value;
|
|
|
+import org.springframework.cloud.context.config.annotation.RefreshScope;
|
|
|
+import org.springframework.stereotype.Component;
|
|
|
+
|
|
|
+import java.nio.ByteBuffer;
|
|
|
+import java.util.UUID;
|
|
|
+
|
|
|
+@Component
|
|
|
+@RefreshScope
|
|
|
+@Slf4j
|
|
|
+public class TtsUtil {
|
|
|
+
|
|
|
+ //SpeechClient应用全局创建一个即可,生命周期可和整个应用保持一致
|
|
|
+ static SpeechClient proxy = new SpeechClient(TtsConstant.DEFAULT_TTS_REQ_URL);
|
|
|
+
|
|
|
+ @Value("${tts.appId}")
|
|
|
+ private String appId;
|
|
|
+
|
|
|
+ @Value("${tts.secretId}")
|
|
|
+ private String secretId;
|
|
|
+
|
|
|
+ @Value("${tts.secretKey}")
|
|
|
+ private String secretKey;
|
|
|
+
|
|
|
+ @Value("${tts.voiceType}")
|
|
|
+ private Integer voiceType;
|
|
|
+
|
|
|
+ public void textToVoice(String text, String voiceFilePath){
|
|
|
+ String codec = FileUtil.extName(voiceFilePath);
|
|
|
+ Credential credential = new Credential(appId, secretId, secretKey);
|
|
|
+ SpeechSynthesizerRequest request = new SpeechSynthesizerRequest();
|
|
|
+ request.setText(text);
|
|
|
+ //音色 ID
|
|
|
+ request.setVoiceType(voiceType);
|
|
|
+ //音量大小,范围[-10,10],对应音量大小。默认为0,代表正常音量,值越大音量越高。
|
|
|
+ request.setVolume(0f);
|
|
|
+ //语速,范围:[-2,6],分别对应不同语速:-2代表0.6倍 -1代表0.8倍 0代表1.0倍(默认) 1代表1.2倍 2代表1.5倍 6代表2.5倍
|
|
|
+ request.setSpeed(0f);
|
|
|
+ //返回音频格式,可取值:wav(默认),mp3,pcm
|
|
|
+ request.setCodec(codec);
|
|
|
+ //音频采样率:24000:24k(部分音色支持,请参见音色列表) 16000:16k(默认)8000:8k
|
|
|
+ request.setSampleRate(16000);
|
|
|
+ request.setEnableSubtitle(true);
|
|
|
+ //控制合成音频的情感,仅支持多情感音色使用。取值: neutral(中性)、sad(悲伤)、happy(高兴)、angry(生气)、fear(恐惧)、news(新闻)、story(故事)、radio(广播)、poetry(诗歌)、call(客服)、sajiao(撒娇)、disgusted(厌恶)、amaze(震惊)、peaceful(平静)、exciting(兴奋)、aojiao(傲娇)、jieshuo(解说)
|
|
|
+ request.setEmotionCategory("neutral");
|
|
|
+ //控制合成音频情感程度,取值范围为[50,200],默认为100;只有EmotionCategory不为空时生效;
|
|
|
+ request.setEmotionIntensity(100);
|
|
|
+ //sessionId,需要保持全局唯一(推荐使用 uuid),遇到问题需要提供该值方便服务端排查
|
|
|
+ request.setSessionId(UUID.randomUUID().toString());
|
|
|
+ //断句敏感阈值,默认值为:0,取值范围:[0,1,2]。该值越大越不容易断句,模型会更倾向于仅按照标点符号断句。此参数建议不要随意调整,可能会影响合成效果。
|
|
|
+ request.set("SegmentRate", 0); //sdk暂未支持参数,可通过该方法设置
|
|
|
+ SpeechSynthesizerListener listener = new SpeechSynthesizerListener() {//tips:回调方法中应该避免进行耗时操作,如果有耗时操作建议进行异步处理否则会影响websocket请求处理
|
|
|
+ byte[] audio = new byte[0];
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void onSynthesisStart(SpeechSynthesizerResponse response) {
|
|
|
+ log.info("{} session_id:{},{}", "onSynthesisStart", response.getSessionId(), new Gson().toJson(response));
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void onSynthesisEnd(SpeechSynthesizerResponse response) {
|
|
|
+ log.info("{} session_id:{},{}", "onSynthesisEnd", response.getSessionId(), new Gson().toJson(response));
|
|
|
+ if ("pcm".equals(request.getCodec())) {
|
|
|
+ Ttsutils.responsePcm2Wav(16000, audio, request.getSessionId());
|
|
|
+ }
|
|
|
+ if ("mp3".equals(request.getCodec())) {
|
|
|
+ Ttsutils.saveResponseToFile(audio, voiceFilePath);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void onAudioResult(ByteBuffer buffer) {
|
|
|
+ byte[] data = new byte[buffer.remaining()];
|
|
|
+ buffer.get(data);
|
|
|
+ audio = ByteUtils.concat(audio, data);
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void onTextResult(SpeechSynthesizerResponse response) {
|
|
|
+ log.info("{} session_id:{},{}", "onTextResult", response.getSessionId(), new Gson().toJson(response));
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public void onSynthesisFail(SpeechSynthesizerResponse response) {
|
|
|
+ log.info("{} session_id:{},{}", "onSynthesisFail", response.getSessionId(), new Gson().toJson(response));
|
|
|
+ }
|
|
|
+ };
|
|
|
+ //synthesizer不可重复使用,每次合成需要重新生成新对象
|
|
|
+ SpeechSynthesizer synthesizer = null;
|
|
|
+ try {
|
|
|
+ synthesizer = new SpeechSynthesizer(proxy, credential, request, listener);
|
|
|
+ long currentTimeMillis = System.currentTimeMillis();
|
|
|
+ synthesizer.start();
|
|
|
+ log.info("synthesizer start latency : " + (System.currentTimeMillis() - currentTimeMillis) + " ms");
|
|
|
+ currentTimeMillis = System.currentTimeMillis();
|
|
|
+ synthesizer.stop();
|
|
|
+ log.info("synthesizer stop latency : " + (System.currentTimeMillis() - currentTimeMillis) + " ms");
|
|
|
+
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error(e.getMessage());
|
|
|
+ } finally {
|
|
|
+ if (synthesizer != null) {
|
|
|
+ synthesizer.close(); //关闭连接
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+}
|