Unity 工具 之 Azure 微軟連續(xù)語音識(shí)別ASR的簡(jiǎn)單整理
目錄
Unity 工具 之 Azure 微軟連續(xù)語音識(shí)別ASR的簡(jiǎn)單整理
一、簡(jiǎn)單介紹
二、實(shí)現(xiàn)原理
三、注意實(shí)現(xiàn)
四、實(shí)現(xiàn)步驟
?五、關(guān)鍵腳本
一、簡(jiǎn)單介紹
Unity 工具類,自己整理的一些游戲開發(fā)可能用到的模塊,單獨(dú)獨(dú)立使用,方便游戲開發(fā)。
本節(jié)介紹,這里在使用微軟的Azure 進(jìn)行語音合成的兩個(gè)方法的做簡(jiǎn)單整理,這里簡(jiǎn)單說明,如果你有更好的方法,歡迎留言交流。
官網(wǎng)注冊(cè):
面向?qū)W生的 Azure - 免費(fèi)帳戶額度 | Microsoft Azure
官網(wǎng)技術(shù)文檔網(wǎng)址:
技術(shù)文檔 | Microsoft Learn
官網(wǎng)的TTS:
語音轉(zhuǎn)文本快速入門 - 語音服務(wù) - Azure AI services | Microsoft Learn
Azure Unity SDK? 包官網(wǎng):
安裝語音 SDK - Azure Cognitive Services | Microsoft Learn
SDK具體鏈接:
https://aka.ms/csspeech/unitypackage
二、實(shí)現(xiàn)原理
1、官網(wǎng)申請(qǐng)得到語音識(shí)別對(duì)應(yīng)的 SPEECH_KEY 和 SPEECH_REGION
2、因?yàn)檎Z音識(shí)別需要用到麥克風(fēng),移動(dòng)端需要申請(qǐng)麥克風(fēng)權(quán)限
3、開啟語音識(shí)別,監(jiān)聽語音識(shí)別對(duì)應(yīng)事件,即可獲取到識(shí)別結(jié)果
三、注意實(shí)現(xiàn)
1、注意如果有卡頓什么的,注意主子線程切換,可能可以適當(dāng)解決你的卡頓現(xiàn)象
2、注意電腦端(例如windows)運(yùn)行可以不申請(qǐng)麥克風(fēng)權(quán)限,但是移動(dòng)端(例如Android)運(yùn)行要申請(qǐng)麥克風(fēng)權(quán)限,不然無法開啟識(shí)別成功,可能會(huì)報(bào)錯(cuò):Exception with an error code: 0x15
<uses-permission android:name="android.permission.RECORD_AUDIO"/>
System.ApplicationException: Exception with an error code: 0x15 at Microsoft.CognitiveServices.Speech.Internal.SpxExceptionThrower.ThrowIfFail (System.IntPtr hr) [0x00000] in <00000000000000000000000000000000>:0 at Microsoft.CognitiveServices.Speech.Recognizer.StartContinuousRecognition () [0x00000] in <00000000000000000000000000000000>:0 at Microsoft.CognitiveServices.Speech.Recognizer.DoAsyncRecognitionAction (System.Action recoImplAction) [0x00000] in <00000000000000000000000000000000>:0 at System.Threading.Tasks.Task.Execute () [0x00000] in <00000000000000000000000000000000>:0 at System.Threading.ExecutionContext.RunInternal (System.Threading.ExecutionContext executionContext, System.Threading.ContextCallback callback, System.Object state, System.Boolean preserveSyncCtx) [0x00000] in <00000000000000000000000000000000>:0 at System.Threading.Tasks.Task.ExecuteWithThreadLocal (System.Threading.Tasks.Task& currentTaskSlot) [0x00000] in
四、實(shí)現(xiàn)步驟
1、下載好SDK 導(dǎo)入
2、簡(jiǎn)單的搭建場(chǎng)景
3、編寫對(duì)應(yīng)腳本,測(cè)試語音識(shí)別功能
4、把測(cè)試腳本添加到場(chǎng)景中,并賦值
5、如果移動(dòng)端,例如 Android 端,勾選如下,添加麥克風(fēng)權(quán)限
<uses-permission android:name="android.permission.RECORD_AUDIO"/>
5、運(yùn)行,點(diǎn)擊對(duì)應(yīng)按鈕,開始識(shí)別,Console 中可以看到識(shí)別結(jié)果
?五、關(guān)鍵腳本
1、TestSpeechRecognitionHandler
using UnityEngine;
using UnityEngine.Android;
using UnityEngine.UI;
public class TestSpeechRecognitionHandler : MonoBehaviour
{
#region Data
/// <summary>
/// 按鈕,文本
/// </summary>
public Button QuitButton;
public Button ASRButton;
public Button StopASRButton;
public Text ASRText;
/// <summary>
/// m_SpeechAndKeywordRecognitionHandler
/// </summary>
SpeechRecognitionHandler m_SpeechAndKeywordRecognitionHandler;
#endregion
#region Liefecycle function
/// <summary>
/// Start
/// </summary>
void Start()
{
QuitButton.onClick.AddListener(OnClickQuitButton);
ASRButton.onClick.AddListener(OnClickASRButton);
StopASRButton.onClick.AddListener(OnClickStopASRButton);
// 請(qǐng)求麥克風(fēng)權(quán)限
RequestMicrophonePermission();
}
/// <summary>
/// 應(yīng)用退出
/// </summary>
async void OnApplicationQuit() {
await m_SpeechAndKeywordRecognitionHandler.StopContinuousRecognizer();
}
#endregion
#region Private function
/// <summary>
/// RequestMicrophonePermission
/// </summary>
void RequestMicrophonePermission()
{
// 檢查當(dāng)前平臺(tái)是否為 Android
if (Application.platform == RuntimePlatform.Android)
{
// 檢查是否已經(jīng)授予麥克風(fēng)權(quán)限
if (!Permission.HasUserAuthorizedPermission(Permission.Microphone))
{
// 如果沒有權(quán)限,請(qǐng)求用戶授權(quán)
Permission.RequestUserPermission(Permission.Microphone);
}
}
else
{
// 在其他平臺(tái)上,可以執(zhí)行其他平臺(tái)特定的邏輯
Debug.LogWarning("Microphone permission is not needed on this platform.");
}
SpeechInitialized();
}
/// <summary>
/// SpeechInitialized
/// </summary>
private void SpeechInitialized() {
ASRText.text = "";
m_SpeechAndKeywordRecognitionHandler = new SpeechRecognitionHandler();
m_SpeechAndKeywordRecognitionHandler.onRecognizingAction = (str) => { Debug.Log("onRecognizingAction: " + str); };
m_SpeechAndKeywordRecognitionHandler.onRecognizedSpeechAction = (str) => { Loom.QueueOnMainThread(() => ASRText.text += str); Debug.Log("onRecognizedSpeechAction: " + str); };
m_SpeechAndKeywordRecognitionHandler.onErrorAction = (str) => { Debug.Log("onErrorAction: " + str); };
m_SpeechAndKeywordRecognitionHandler.Initialized();
}
/// <summary>
/// OnClickQuitButton
/// </summary>
private void OnClickQuitButton() {
#if UNITY_EDITOR
UnityEditor.EditorApplication.isPlaying = false;
#else
Application.Quit();
#endif
}
/// <summary>
/// OnClickASRButton
/// </summary>
private void OnClickASRButton() {
m_SpeechAndKeywordRecognitionHandler.StartContinuousRecognizer();
}
/// <summary>
/// OnClickStopASRButton
/// </summary>
private async void OnClickStopASRButton()
{
await m_SpeechAndKeywordRecognitionHandler.StopContinuousRecognizer();
}
#endregion
}
2、SpeechRecognitionHandler文章來源:http://www.zghlxwxcb.cn/news/detail-790103.html
using UnityEngine;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using System;
using Task = System.Threading.Tasks.Task;
/// <summary>
/// 語音識(shí)別轉(zhuǎn)文本和關(guān)鍵詞識(shí)別
/// </summary>
public class SpeechRecognitionHandler
{
#region Data
/// <summary>
///
/// </summary>
const string TAG = "[SpeechAndKeywordRecognitionHandler] ";
/// <summary>
/// 識(shí)別配置
/// </summary>
private SpeechConfig m_SpeechConfig;
/// <summary>
/// 音頻配置
/// </summary>
private AudioConfig m_AudioConfig;
/// <summary>
/// 語音識(shí)別
/// </summary>
private SpeechRecognizer m_SpeechRecognizer;
/// <summary>
/// LLM 大模型配置
/// </summary>
private ASRConfig m_ASRConfig;
/// <summary>
/// 識(shí)別的事件
/// </summary>
public Action<string> onRecognizingAction;
public Action<string> onRecognizedSpeechAction;
public Action<string> onErrorAction;
public Action<string> onSessionStoppedAction;
#endregion
#region Public function
/// <summary>
/// 初始化
/// </summary>
/// <returns></returns>
public async void Initialized()
{
m_ASRConfig = new ASRConfig();
Debug.Log(TAG + "m_LLMConfig.AZURE_SPEECH_RECOGNITION_LANGUAGE " + m_ASRConfig.AZURE_SPEECH_RECOGNITION_LANGUAGE);
Debug.Log(TAG + "m_LLMConfig.AZURE_SPEECH_REGION " + m_ASRConfig.AZURE_SPEECH_REGION);
m_SpeechConfig = SpeechConfig.FromSubscription(m_ASRConfig.AZURE_SPEECH_KEY, m_ASRConfig.AZURE_SPEECH_REGION);
m_SpeechConfig.SpeechRecognitionLanguage = m_ASRConfig.AZURE_SPEECH_RECOGNITION_LANGUAGE;
m_AudioConfig = AudioConfig.FromDefaultMicrophoneInput();
Debug.Log(TAG + " Initialized 2 ====");
// 根據(jù)自己需要處理(不需要也行)
await Task.Delay(100);
}
#endregion
#region Private function
/// <summary>
/// 設(shè)置識(shí)別回調(diào)事件
/// </summary>
private void SetRecoginzeCallback()
{
Debug.Log(TAG + " SetRecoginzeCallback == ");
if (m_SpeechRecognizer != null)
{
m_SpeechRecognizer.Recognizing += OnRecognizing;
m_SpeechRecognizer.Recognized += OnRecognized;
m_SpeechRecognizer.Canceled += OnCanceled;
m_SpeechRecognizer.SessionStopped += OnSessionStopped;
Debug.Log(TAG+" SetRecoginzeCallback OK ");
}
}
#endregion
#region Callback
/// <summary>
/// 正在識(shí)別
/// </summary>
/// <param name="s"></param>
/// <param name="e"></param>
private void OnRecognizing(object s, SpeechRecognitionEventArgs e)
{
Debug.Log(TAG + "RecognizingSpeech:" + e.Result.Text + " :[e.Result.Reason]:" + e.Result.Reason);
if (e.Result.Reason == ResultReason.RecognizingSpeech )
{
Debug.Log(TAG + " Trigger onRecognizingAction is null :" + onRecognizingAction == null);
onRecognizingAction?.Invoke(e.Result.Text);
}
}
/// <summary>
/// 識(shí)別結(jié)束
/// </summary>
/// <param name="s"></param>
/// <param name="e"></param>
private void OnRecognized(object s, SpeechRecognitionEventArgs e)
{
Debug.Log(TAG + "RecognizedSpeech:" + e.Result.Text + " :[e.Result.Reason]:" + e.Result.Reason);
if (e.Result.Reason == ResultReason.RecognizedSpeech )
{
bool tmp = onRecognizedSpeechAction == null;
Debug.Log(TAG + " Trigger onRecognizedSpeechAction is null :" + tmp);
onRecognizedSpeechAction?.Invoke(e.Result.Text);
}
}
/// <summary>
/// 識(shí)別取消
/// </summary>
/// <param name="s"></param>
/// <param name="e"></param>
private void OnCanceled(object s, SpeechRecognitionCanceledEventArgs e)
{
Debug.LogFormat(TAG+"Canceled: Reason={0}", e.Reason );
if (e.Reason == CancellationReason.Error)
{
onErrorAction?.Invoke(e.ErrorDetails);
}
}
/// <summary>
/// 會(huì)話結(jié)束
/// </summary>
/// <param name="s"></param>
/// <param name="e"></param>
private void OnSessionStopped(object s, SessionEventArgs e)
{
Debug.Log(TAG+"Session stopped event." );
onSessionStoppedAction?.Invoke("Session stopped event.");
}
#endregion
#region 連續(xù)語音識(shí)別轉(zhuǎn)文本
/// <summary>
/// 開啟連續(xù)語音識(shí)別轉(zhuǎn)文本
/// </summary>
public void StartContinuousRecognizer()
{
Debug.LogWarning(TAG + "StartContinuousRecognizer");
try
{
// 轉(zhuǎn)到異步中(根據(jù)自己需要處理)
Loom.RunAsync(async () => {
try
{
if (m_SpeechRecognizer != null)
{
m_SpeechRecognizer.Dispose();
m_SpeechRecognizer = null;
}
if (m_SpeechRecognizer == null)
{
m_SpeechRecognizer = new SpeechRecognizer(m_SpeechConfig, m_AudioConfig);
SetRecoginzeCallback();
}
await m_SpeechRecognizer.StartContinuousRecognitionAsync().ConfigureAwait(false);
Loom.QueueOnMainThread(() => {
Debug.LogWarning(TAG + "StartContinuousRecognizer QueueOnMainThread ok");
});
Debug.LogWarning(TAG + "StartContinuousRecognizer RunAsync ok");
}
catch (Exception e)
{
Loom.QueueOnMainThread(() =>
{
Debug.LogError(TAG + " StartContinuousRecognizer 0 " + e);
});
}
});
}
catch (Exception e)
{
Debug.LogError(TAG + " StartContinuousRecognizer 1 " + e);
}
}
/// <summary>
/// 結(jié)束連續(xù)語音識(shí)別轉(zhuǎn)文本
/// </summary>
public async Task StopContinuousRecognizer()
{
try
{
if (m_SpeechRecognizer != null)
{
await m_SpeechRecognizer.StopContinuousRecognitionAsync().ConfigureAwait(false);
//m_SpeechRecognizer.Dispose();
//m_SpeechRecognizer = null;
Debug.LogWarning(TAG + " StopContinuousRecognizer");
}
}
catch (Exception e)
{
Debug.LogError(TAG + " StopContinuousRecognizer Exception : " + e);
}
}
#endregion
}
3、ASRConfig文章來源地址http://www.zghlxwxcb.cn/news/detail-790103.html
public class ASRConfig
{
#region Azure ASR
/// <summary>
/// AZURE_SPEECH_KEY
/// </summary>
public virtual string AZURE_SPEECH_KEY { get; } = @"You_Key";
/// <summary>
/// AZURE_SPEECH_REGION
/// </summary>
public virtual string AZURE_SPEECH_REGION { get; } = @"eastasia";
/// <summary>
/// AZURE_SPEECH_RECOGNITION_LANGUAGE
/// </summary>
public virtual string AZURE_SPEECH_RECOGNITION_LANGUAGE { get; } = @"zh-CN";
#endregion
}
到了這里,關(guān)于Unity 工具 之 Azure 微軟連續(xù)語音識(shí)別ASR的簡(jiǎn)單整理的文章就介紹完了。如果您還想了解更多內(nèi)容,請(qǐng)?jiān)谟疑辖撬阉鱐OY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!