Unity对接科大讯飞实时语音转写WebAPI(Windows平台)

embedded/2024/10/19 0:12:49/

科大讯飞官方文档:实时语音转写 API 文档 | 讯飞开放平台文档中心 (xfyun.cn)

参考文章:unity通过WebAPI连接Websocket实现讯飞语音识别与合成。_unity websocket audio-CSDN博客

        要实现语音转文字。首先我们需要从麦克风获取到语音数据,这里用到了Microphone类,Unity自带;其次,需要将语音数据发送给讯飞,这里用到的是WebSocketSharp.WebSocket,用习惯了。然后就是按照文档一步步踩坑了。

        直接贴代码了。代码主要实现握手阶段参数签名,实时通信阶段的数据传输以及结果解析。

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System;
using WebSocketSharp;
using System.Text;
using System.Security.Cryptography;
using LitJson;
using Newtonsoft.Json;public class SpeechHelper : MonoBehaviour
{public event Action<string> 语音识别完成事件;   //语音识别回调事件public AudioClip RecordedClip;private string micphoneName = string.Empty;WebSocket speechWebSocket;private System.Action<string> resultCallback;public void InitSpeechHelper(System.Action<string> textCallback){resultCallback = textCallback;}public void StartSpeech(){if (speechWebSocket != null && speechWebSocket.ReadyState == WebSocketState.Open){Debug.LogWarning("开始语音识别失败!,等待上次识别连接结束");return;}if(Microphone.devices.Length <= 0){Debug.LogWarning("找不到麦克风");return;}messageQueue.Clear();micphoneName = Microphone.devices[0];Debug.Log("micphoneName:" + micphoneName);try{RecordedClip = Microphone.Start(micphoneName, false, 60, 16000);ConnectSpeechWebSocket();}catch(Exception ex){Debug.LogError(ex.Message);}}public void StopSpeech(){Microphone.End(micphoneName);Debug.Log("识别结束,停止录音");}void ConnectSpeechWebSocket(){try{speechWebSocket = new WebSocket(GetWebSocketUrl());}catch (Exception ex){UnityEngine.Debug.LogError(ex.Message);return;}speechWebSocket.OnOpen += (sender, e) =>{Debug.Log("OnOpen");speechWebSocket.OnClose += OnWebSocketClose;};speechWebSocket.OnMessage += OnInitMessage;speechWebSocket.OnError += OnError;speechWebSocket.ConnectAsync();StartCoroutine(SendVoiceData());}void OnWebSocketClose(object sender, CloseEventArgs e){Debug.Log("OnWebSocketClose");}private static Queue<string> messageQueue = new Queue<string>();void OnInitMessage(object sender, MessageEventArgs e){UnityEngine.Debug.Log("qqqqqqqqqqqqqWebSocket数据返回:" + e.Data);messageQueue.Enqueue(e.Data);}private void MainThreadOnMessage(string message){try{XFResponse response = JsonConvert.DeserializeObject<XFResponse>(message);if (0 != response.code){return;}if (response.action.Equals("result")){var result = ParseXunfeiRecognitionResult(response.data);if(result.IsFinal){Debug.Log("Text最终:" + result.Text);resultCallback?.Invoke(result.Text);}else{Debug.Log("Text中间:" + result.Text);}}}catch (Exception ex){Debug.LogError(ex.Message);}}void OnError(object sender, ErrorEventArgs e){UnityEngine.Debug.Log("WebSoclet:发生错误:" + e.Message);}public SpeechRecognitionResult ParseXunfeiRecognitionResult(string dataJson){StringBuilder builder = new StringBuilder();SpeechRecognitionResult res = new SpeechRecognitionResult();try{JsonData data = JsonMapper.ToObject(dataJson);JsonData cn = data["cn"];JsonData st = cn["st"];if (st["ed"].ToString().Equals("0")){res.IsFinal = false;}else{res.IsFinal = true;}JsonData rtArry = st["rt"];foreach (JsonData rtObject in rtArry){JsonData wsArr = rtObject["ws"];foreach (JsonData wsObject in wsArr){JsonData cwArr = wsObject["cw"];foreach (JsonData cwObject in cwArr){builder.Append(cwObject["w"].ToString());}}}}catch(Exception ex){Debug.LogError(ex.Message);}res.Text = builder.ToString();return res;}void SendData(byte[] voiceData){Debug.Log("SendData:" + voiceData.Length + ",time:" + Time.realtimeSinceStartup);if (speechWebSocket.ReadyState != WebSocketState.Open){return;}try{if (speechWebSocket != null && speechWebSocket.IsAlive){speechWebSocket.SendAsync(voiceData, success =>{if (success){UnityEngine.Debug.Log("WebSoclet:发送成功:" + voiceData.Length);}else{UnityEngine.Debug.Log("WebSoclet:发送失败:");}});}}catch{}}void SendEndMsg(System.Action callback){string endMsg = "{\"end\": true}";byte[] data = Encoding.UTF8.GetBytes(endMsg);try{if (speechWebSocket != null && speechWebSocket.IsAlive){speechWebSocket.SendAsync(data, success =>{if (success){UnityEngine.Debug.Log("WebSoclet:发送END成功:" + data.Length);}else{UnityEngine.Debug.Log("WebSoclet:发送END失败:");}callback?.Invoke();});}}catch{}}IEnumerator SendVoiceData(){yield return new WaitUntil(()=> (speechWebSocket.ReadyState == WebSocketState.Open));yield return new WaitWhile(() => Microphone.GetPosition(micphoneName) <= 0);float t = 0;int position = Microphone.GetPosition(micphoneName);const float waitTime = 0.04f;//每隔40ms发送音频int lastPosition = 0;const int Maxlength = 640;//最大发送长度//Debug.Log("position:" + position + ",samples:" + RecordedClip.samples);while (position < RecordedClip.samples && speechWebSocket.ReadyState == WebSocketState.Open){t += waitTime;yield return new WaitForSecondsRealtime(waitTime);if (Microphone.IsRecording(micphoneName)) position = Microphone.GetPosition(micphoneName);//Debug.Log("录音时长:" + t + "position=" + position + ",lastPosition=" + lastPosition);if (position <= lastPosition){Debug.LogWarning("字节流发送完毕!强制结束!");break;}int length = position - lastPosition > Maxlength ? Maxlength : position - lastPosition;byte[] date = GetClipData(lastPosition, length, RecordedClip);SendData(date);lastPosition = lastPosition + length;}yield return new WaitForSecondsRealtime(waitTime);SendEndMsg(null);Microphone.End(micphoneName);}public byte[] GetClipData(int star, int length, AudioClip recordedClip){float[] soundata = new float[length];recordedClip.GetData(soundata, star);int rescaleFactor = 32767;byte[] outData = new byte[soundata.Length * 2];for (int i = 0; i < soundata.Length; i++){short temshort = (short)(soundata[i] * rescaleFactor);byte[] temdata = BitConverter.GetBytes(temshort);outData[i * 2] = temdata[0];outData[i * 2 + 1] = temdata[1];}return outData;}private string GetWebSocketUrl(){string appid = "appid";string ts = GetCurrentUnixTimestampMillis().ToString();string baseString = appid + ts;string md5 = GetMD5Hash(baseString);UnityEngine.Debug.Log("baseString:" + baseString + ",md5:" + md5);string sha1 = CalculateHmacSha1(md5, "appkey");string signa = sha1;string url = string.Format("ws://rtasr.xfyun.cn/v1/ws?appid={0}&ts={1}&signa={2}", appid, ts, signa);UnityEngine.Debug.Log(url);return url;}private long GetCurrentUnixTimestampMillis(){DateTime unixStartTime = new DateTime(1970, 1, 1).ToLocalTime();DateTime now = DateTime.Now;// DateTime.UtcNow;TimeSpan timeSpan = now - unixStartTime;long timestamp = (long)timeSpan.TotalSeconds;return timestamp;}public string GetMD5Hash(string input){MD5 md5Hasher = MD5.Create();byte[] data = md5Hasher.ComputeHash(Encoding.Default.GetBytes(input));StringBuilder sBuilder = new StringBuilder();for (int i = 0; i < data.Length; i++){sBuilder.Append(data[i].ToString("x2"));}return sBuilder.ToString();}public string CalculateHmacSha1(string data, string key){HMACSHA1 hmac = new HMACSHA1(Encoding.UTF8.GetBytes(key));byte[] hashBytes = hmac.ComputeHash(Encoding.UTF8.GetBytes(data));return Convert.ToBase64String(hashBytes);}private void Update(){if(messageQueue.Count > 0){MainThreadOnMessage(messageQueue.Dequeue());}}
}

Json解析类。

[Serializable]
public struct XFResponse
{public string action;public int code;public string data;public string desc;public string sid;
}
[Serializable]
public struct SpeechRecognitionResult
{public string Text;        public bool IsFinal;        
}

值得注意的问题。

1、Microphone使用时传默认设备名比传null好使

2、握手阶段时间戳用的是秒(不是毫秒)

3、上传结束标志时,也要间隔40ms,否则讯飞像是没收到一样

遗留问题:

yield return new WaitForSecondsRealtime(0.04f)实际间隔时间0.1s左右,导致消息发送得很慢


http://www.ppmy.cn/embedded/35393.html

相关文章

搜索算法系列之四(斐波那契)

以下算法被验证过&#xff0c;如有什么问题或有补充的欢迎留言。 前言 斐波那契数列&#xff0c;又称黄金分割数列&#xff0c;是由意大利数学家&#xff08;Leonardo Fibonacci&#xff09;在1202年提出的。这个数列的递推关系是F(0)1&#xff0c;F(1)1&#xff0c;F(n)F(n-…

Celery(分布式任务队列)入门学习笔记

Celery 的简单介绍 用 Celery 官方的介绍&#xff1a;它是一个分布式任务队列; 简单&#xff0c;灵活&#xff0c;可靠的处理大量消息的分布式系统; 它专注于实时处理&#xff0c;并支持任务调度。 Celery 如果使用 RabbitMQ 作为消息系统的话&#xff0c;整个应用体系就是下…

代码随想录算法训练营第三十八天|动态规划理论基础,509. 斐波那契数,70. 爬楼梯,746. 使用最小花费爬楼梯

目录 动态规划理论基础509. 斐波那契数思路代码 70. 爬楼梯思路代码 746. 使用最小花费爬楼梯思路代码 动态规划理论基础 文档讲解&#xff1a;代码随想录 视频讲解&#xff1a;从此再也不怕动态规划了&#xff0c;动态规划解题方法论大曝光 &#xff01;| 理论基础 |力扣刷题总…

深度学习之DCGAN

目录 须知 转置卷积 DCGAN 什么是DCGAN 生成器代码 判别器代码 补充知识 LeakyReLU&#xff08;x&#xff09; torch.nn.Dropout torch.nn.Dropout2d DCGAN完整代码 运行结果 图形显示 须知 在讲解DCGAN之前我们首先要了解转置卷积和GAN 关于GAN在这片博客中已经很…

Python查询PostgreSQL数据库

哈喽&#xff0c;大家好&#xff0c;我是木头左&#xff01; Python与PostgreSQL的连接 需要了解如何在Python中连接到PostgreSQL数据库。这通常涉及到使用一个库&#xff0c;如psycopg2&#xff0c;它是Python中用于PostgreSQL的最流行的适配器。安装psycopg2非常简单&#x…

SpringBoot使用git-commit-id-maven-plugin打包

简介 git-commit-id-maven-plugin 是一个maven 插件&#xff0c;用来在打包的时候将git-commit 信息打进jar中。 这样做的好处是可以将发布的某版本和对应的代码关联起来&#xff0c;方便查阅和线上项目的维护。至于它的作用&#xff0c;用官方说法&#xff0c;这个功能对于大…

网页html版面分析-- BeauifulSoup(python 文档解析提取)

介绍 BeauifulSoup 是一个可以从HTML或XML 文件中提取数据的python库&#xff1b;它能通过转换器实现惯用的文档导航、查找、修改文档的方式。 BeauifulSoup是一个基于re开发的解析库&#xff0c;可以提供一些强大的解析功能&#xff1b;使用BeauifulSoup 能够提高提取数据的效…

创享大会分会场—“需求掉马”《业技融合的问题冲刺》

“不能对业务产生影响的研发效能提升是浪费更是罪恶&#xff1b;行动学习的结果大于产出&#xff1b;学至于行之而止矣&#xff0c;行之&#xff0c;明也&#xff1b;” 会议主题&#xff1a;创享大会分会场—“需求掉马”《业技融合的问题冲刺》 会议时间&#xff1a;5月25日…