dengsixing 10 月之前
父節點
當前提交
408945d40c
共有 3 個文件被更改,包括 218 次插入0 次删除
  1. 12 0
      pom.xml
  2. 82 0
      src/main/java/com/fdkankan/scene/util/AsrUtil.java
  3. 124 0
      src/main/java/com/fdkankan/scene/util/TtsUtil.java

+ 12 - 0
pom.xml

@@ -158,6 +158,18 @@
             <version>3.0.0-SNAPSHOT</version>
         </dependency>
 
+        <dependency>
+            <groupId>com.tencentcloudapi</groupId>
+            <artifactId>tencentcloud-speech-sdk-java</artifactId>
+            <version>1.0.51</version>
+        </dependency>
+
+        <dependency>
+            <groupId>com.tencentcloudapi</groupId>
+            <artifactId>tencentcloud-sdk-java</artifactId>
+            <version>3.1.1123</version>
+        </dependency>
+
     </dependencies>
 
     <dependencyManagement>

+ 82 - 0
src/main/java/com/fdkankan/scene/util/AsrUtil.java

@@ -0,0 +1,82 @@
+package com.fdkankan.scene.util;
+
+import cn.hutool.core.io.FileUtil;
+import com.google.gson.Gson;
+import com.tencent.core.utils.ByteUtils;
+import com.tencent.core.ws.SpeechClient;
+import com.tencent.tts.utils.Ttsutils;
+import com.tencent.ttsv2.*;
+import com.tencentcloudapi.asr.v20190614.AsrClient;
+import com.tencentcloudapi.asr.v20190614.models.*;
+import com.tencentcloudapi.common.Credential;
+import com.tencentcloudapi.common.exception.TencentCloudSDKException;
+import com.tencentcloudapi.common.profile.ClientProfile;
+import com.tencentcloudapi.common.profile.HttpProfile;
+import com.tencentcloudapi.common.profile.Language;
+import com.tencentcloudapi.cvm.v20170312.CvmClient;
+import com.tencentcloudapi.cvm.v20170312.models.DescribeInstancesRequest;
+import com.tencentcloudapi.cvm.v20170312.models.DescribeInstancesResponse;
+import com.tencentcloudapi.cvm.v20170312.models.Filter;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.cloud.context.config.annotation.RefreshScope;
+import org.springframework.stereotype.Component;
+
+import java.nio.ByteBuffer;
+import java.util.UUID;
+
+@Component
+@RefreshScope
+@Slf4j
+public class AsrUtil {
+
+    @Value("${asr.appId}")
+    private String appId;
+
+    @Value("${asr.secretId}")
+    private String secretId;
+
+    @Value("${asr.secretKey}")
+    private String secretKey;
+
+    public void voiceToText(String url){
+
+        //配置请求参数
+        CreateAsyncRecognitionTaskRequest req = new CreateAsyncRecognitionTaskRequest();
+        req.setEngineType("16k_zh_large");
+        //url 配置自己的直播流地址
+        req.setUrl(url);
+        //CallbackUrl 配置自己的回调地址
+//        req.setCallbackUrl("https://xxxxxx.com/xxxx/callback");
+
+        Credential credential = new Credential(secretId, secretKey);
+        AsrClient client = new AsrClient(credential, "ap-shanghai");
+        try {
+            //创建异步识别任务
+            CreateAsyncRecognitionTaskResponse response = client.CreateAsyncRecognitionTask(req);
+            Gson gson = new Gson();
+            System.out.println(gson.toJson(response));
+
+//            //关闭异步识别任务
+//            CloseAsyncRecognitionTaskRequest closeAsyncRecognitionTaskRequest = new CloseAsyncRecognitionTaskRequest();
+//            closeAsyncRecognitionTaskRequest.setTaskId(response.getData().getTaskId());
+//            CloseAsyncRecognitionTaskResponse closeAsyncRecognitionTaskResponse = client.CloseAsyncRecognitionTask(closeAsyncRecognitionTaskRequest);
+//            System.out.println(gson.toJson(closeAsyncRecognitionTaskResponse));
+//
+//            //查询正在执行的异步识别任务
+//            DescribeAsyncRecognitionTasksRequest request = new DescribeAsyncRecognitionTasksRequest();
+//            DescribeAsyncRecognitionTasksResponse response1 = client.DescribeAsyncRecognitionTasks(request);
+//            System.out.println(gson.toJson(response1));
+
+        } catch (TencentCloudSDKException e) {
+            e.printStackTrace();
+        }
+
+
+    }
+
+
+
+
+
+}

+ 124 - 0
src/main/java/com/fdkankan/scene/util/TtsUtil.java

@@ -0,0 +1,124 @@
+package com.fdkankan.scene.util;
+
+import cn.hutool.core.io.FileUtil;
+import com.google.gson.Gson;
+import com.tencent.core.utils.ByteUtils;
+import com.tencent.core.ws.Credential;
+import com.tencent.core.ws.SpeechClient;
+import com.tencent.tts.utils.Ttsutils;
+import com.tencent.ttsv2.*;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.cloud.context.config.annotation.RefreshScope;
+import org.springframework.stereotype.Component;
+
+import java.nio.ByteBuffer;
+import java.util.UUID;
+
+@Component
+@RefreshScope
+@Slf4j
+public class TtsUtil {
+
+    //SpeechClient应用全局创建一个即可,生命周期可和整个应用保持一致
+    static SpeechClient proxy = new SpeechClient(TtsConstant.DEFAULT_TTS_REQ_URL);
+
+    @Value("${tts.appId}")
+    private String appId;
+
+    @Value("${tts.secretId}")
+    private String secretId;
+
+    @Value("${tts.secretKey}")
+    private String secretKey;
+
+    @Value("${tts.voiceType}")
+    private Integer voiceType;
+
+    public void textToVoice(String text, String voiceFilePath){
+        String codec = FileUtil.extName(voiceFilePath);
+        Credential credential = new Credential(appId, secretId, secretKey);
+        SpeechSynthesizerRequest request = new SpeechSynthesizerRequest();
+        request.setText(text);
+        //音色 ID
+        request.setVoiceType(voiceType);
+        //音量大小,范围[-10,10],对应音量大小。默认为0,代表正常音量,值越大音量越高。
+        request.setVolume(0f);
+        //语速,范围:[-2,6],分别对应不同语速:-2代表0.6倍 -1代表0.8倍 0代表1.0倍(默认) 1代表1.2倍 2代表1.5倍 6代表2.5倍
+        request.setSpeed(0f);
+        //返回音频格式,可取值:wav(默认),mp3,pcm
+        request.setCodec(codec);
+        //音频采样率:24000:24k(部分音色支持,请参见音色列表) 16000:16k(默认)8000:8k
+        request.setSampleRate(16000);
+        request.setEnableSubtitle(true);
+        //控制合成音频的情感,仅支持多情感音色使用。取值: neutral(中性)、sad(悲伤)、happy(高兴)、angry(生气)、fear(恐惧)、news(新闻)、story(故事)、radio(广播)、poetry(诗歌)、call(客服)、sajiao(撒娇)、disgusted(厌恶)、amaze(震惊)、peaceful(平静)、exciting(兴奋)、aojiao(傲娇)、jieshuo(解说)
+        request.setEmotionCategory("neutral");
+        //控制合成音频情感程度,取值范围为[50,200],默认为100;只有EmotionCategory不为空时生效;
+        request.setEmotionIntensity(100);
+        //sessionId,需要保持全局唯一(推荐使用 uuid),遇到问题需要提供该值方便服务端排查
+        request.setSessionId(UUID.randomUUID().toString());
+        //断句敏感阈值,默认值为:0,取值范围:[0,1,2]。该值越大越不容易断句,模型会更倾向于仅按照标点符号断句。此参数建议不要随意调整,可能会影响合成效果。
+        request.set("SegmentRate", 0); //sdk暂未支持参数,可通过该方法设置
+        SpeechSynthesizerListener listener = new SpeechSynthesizerListener() {//tips:回调方法中应该避免进行耗时操作,如果有耗时操作建议进行异步处理否则会影响websocket请求处理
+            byte[] audio = new byte[0];
+
+            @Override
+            public void onSynthesisStart(SpeechSynthesizerResponse response) {
+                log.info("{} session_id:{},{}", "onSynthesisStart", response.getSessionId(), new Gson().toJson(response));
+            }
+
+            @Override
+            public void onSynthesisEnd(SpeechSynthesizerResponse response) {
+                log.info("{} session_id:{},{}", "onSynthesisEnd", response.getSessionId(), new Gson().toJson(response));
+                if ("pcm".equals(request.getCodec())) {
+                    Ttsutils.responsePcm2Wav(16000, audio, request.getSessionId());
+                }
+                if ("mp3".equals(request.getCodec())) {
+                    Ttsutils.saveResponseToFile(audio, voiceFilePath);
+                }
+            }
+
+            @Override
+            public void onAudioResult(ByteBuffer buffer) {
+                byte[] data = new byte[buffer.remaining()];
+                buffer.get(data);
+                audio = ByteUtils.concat(audio, data);
+            }
+
+            @Override
+            public void onTextResult(SpeechSynthesizerResponse response) {
+                log.info("{} session_id:{},{}", "onTextResult", response.getSessionId(), new Gson().toJson(response));
+            }
+
+            @Override
+            public void onSynthesisFail(SpeechSynthesizerResponse response) {
+                log.info("{} session_id:{},{}", "onSynthesisFail", response.getSessionId(), new Gson().toJson(response));
+            }
+        };
+        //synthesizer不可重复使用,每次合成需要重新生成新对象
+        SpeechSynthesizer synthesizer = null;
+        try {
+            synthesizer = new SpeechSynthesizer(proxy, credential, request, listener);
+            long currentTimeMillis = System.currentTimeMillis();
+            synthesizer.start();
+            log.info("synthesizer start latency : " + (System.currentTimeMillis() - currentTimeMillis) + " ms");
+            currentTimeMillis = System.currentTimeMillis();
+            synthesizer.stop();
+            log.info("synthesizer stop latency : " + (System.currentTimeMillis() - currentTimeMillis) + " ms");
+
+        } catch (Exception e) {
+            log.error(e.getMessage());
+        } finally {
+            if (synthesizer != null) {
+                synthesizer.close(); //关闭连接
+            }
+        }
+
+    }
+
+
+
+
+
+
+}