egs/callhome/TOLD/soap/conf/EAND_ResNet34_SAN_L4N512_None_FFN_FSMN_L6N512_bce_dia_loss_01.yaml
@@ -1,3 +1,4 @@ init: xavier_uniform model: sond model_conf: lsm_weight: 0.0 @@ -98,7 +99,7 @@ num_workers: 8 max_epoch: 20 num_iters_per_epoch: 10000 keep_nbest_models: 20 keep_nbest_models: 5 # optimization related accum_grad: 1 egs/callhome/TOLD/soap/conf/EAND_ResNet34_SAN_L4N512_None_FFN_FSMN_L6N512_bce_dia_loss_01_phase2.yaml
@@ -1,3 +1,4 @@ init: xavier_uniform model: sond model_conf: lsm_weight: 0.0 @@ -98,7 +99,7 @@ num_workers: 8 max_epoch: 30 num_iters_per_epoch: 10000 keep_nbest_models: 30 keep_nbest_models: 5 # optimization related accum_grad: 1 egs/callhome/TOLD/soap/conf/EAND_ResNet34_SAN_L4N512_None_FFN_FSMN_L6N512_bce_dia_loss_01_phase3.yaml
@@ -1,3 +1,4 @@ init: xavier_uniform model: sond model_conf: lsm_weight: 0.0 @@ -96,7 +97,7 @@ # 6 samples batch_size: 6 num_workers: 8 max_epoch: 12 max_epoch: 10 num_iters_per_epoch: 300 keep_nbest_models: 5 egs/callhome/TOLD/soap/run.sh
@@ -8,7 +8,7 @@ # [2] Speaker Overlap-aware Neural Diarization for Multi-party Meeting Analysis, EMNLP 2022 # We recommend you run this script stage by stage. # [developing] This recipe includes: # This recipe includes: # 1. simulating data with switchboard and NIST. # 2. training the model from scratch for 3 stages: # 2-1. pre-train on simu_swbd_sre @@ -18,6 +18,7 @@ # Finally, you will get a similar DER result claimed in the paper. # environment configuration # path/to/kaldi kaldi_root= if [ -z "${kaldi_root}" ]; then @@ -34,21 +35,35 @@ ln -s ${kaldi_root}/egs/callhome_diarization/v2/utils ./utils fi # path to Switchboard and NIST including: # LDC98S75, LDC99S79, LDC2002S06, LDC2001S13, LDC2004S07 data_root= if [ -z "${data_root}" ]; then echo "We need Switchboard and NIST to simulate data for pretraining." echo "If you can't get them, please use 'finetune.sh' to finetune a pretrained model." exit; fi # path/to/NIST/LDC2001S97 callhome_root= if [ -z "${callhome_root}" ]; then echo "We need callhome corpus for training." echo "If you want inference only, please refer https://www.modelscope.cn/models/damo/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/summary" exit; fi # machines configuration gpu_devices="4,5,6,7" # for V100-16G, use 4 GPUs gpu_num=4 count=1 # general configuration stage=3 stop_stage=3 stage=0 stop_stage=19 # number of jobs for data process nj=16 sr=8000 # dataset related data_root= callhome_root=path/to/NIST/LDC2001S97 # experiment configuration lang=en @@ -68,16 +83,16 @@ freeze_param= # inference related inference_model=valid.der.ave_5best.pth inference_model=valid.der.ave_5best.pb inference_config=conf/basic_inference.yaml inference_tag="" test_sets="callhome1" test_sets="callhome2" gpu_inference=true # Whether to perform gpu decoding, set false for cpu decoding # number of jobs for inference # for gpu decoding, inference_nj=ngpu*njob; for cpu decoding, inference_nj=njob njob=5 njob=4 infer_cmd=utils/run.pl told_max_iter=2 told_max_iter=4 . utils/parse_options.sh || exit 1; @@ -127,6 +142,22 @@ # 3. Prepare the Callhome portion of NIST SRE 2000. local/make_callhome.sh ${callhome_root} ${datadir}/ # 4. split ref.rttm for dset in callhome1 callhome2; do rm -rf ${datadir}/${dset}/ref.rttm for name in `awk '{print $1}' ${datadir}/${dset}/wav.scp`; do grep ${name} ${datadir}/callhome/fullref.rttm >> ${datadir}/${dset}/ref.rttm; done # filter out records which don't have rttm labels. awk '{print $2}' ${datadir}/${dset}/ref.rttm | sort | uniq > ${datadir}/${dset}/uttid mv ${datadir}/${dset}/wav.scp ${datadir}/${dset}/wav.scp.bak awk '{if (NR==FNR){a[$1]=1}else{if (a[$1]==1){print $0}}}' ${datadir}/${dset}/uttid ${datadir}/${dset}/wav.scp.bak > ${datadir}/${dset}/wav.scp mkdir ${datadir}/${dset}/raw mv ${datadir}/${dset}/{reco2num_spk,segments,spk2utt,utt2spk,uttid,wav.scp.bak} ${datadir}/${dset}/raw/ awk '{print $1,$1}' ${datadir}/${dset}/wav.scp > ${datadir}/${dset}/utt2spk done fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then @@ -156,10 +187,10 @@ mkdir -p ${dumpdir}/${dset}/nonoverlap_0s python -Wignore script/extract_nonoverlap_segments.py \ ${datadir}/${dset}/wav.scp ${datadir}/${dset}/ref.rttm ${dumpdir}/${dset}/nonoverlap_0s \ --min_dur 0 --max_spk_num 8 --sr ${sr} --no_pbar --nj ${nj} --min_dur 0.1 --max_spk_num 8 --sr ${sr} --no_pbar --nj ${nj} mkdir -p ${datadir}/${dset}/nonoverlap_0s find `pwd`/${dumpdir}/${dset}/nonoverlap_0s | sort | awk -F'[/.]' '{print $(NF-1),$0}' > ${datadir}/${dset}/nonoverlap_0s/wav.scp find ${dumpdir}/${dset}/nonoverlap_0s/ -iname "*.wav" | sort | awk -F'[/.]' '{print $(NF-1),$0}' > ${datadir}/${dset}/nonoverlap_0s/wav.scp awk -F'[/.]' '{print $(NF-1),$(NF-2)}' ${datadir}/${dset}/nonoverlap_0s/wav.scp > ${datadir}/${dset}/nonoverlap_0s/utt2spk echo "Done." done @@ -279,11 +310,16 @@ if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then echo "Stage 6: Extract speaker embeddings." git lfs install git clone https://www.modelscope.cn/damo/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch.git mv speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch ${expdir}/ sv_exp_dir=exp/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch if [ ! -e ${sv_exp_dir} ]; then echo "start to download sv models" git lfs install git clone https://www.modelscope.cn/damo/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch.git mv speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch ${expdir}/ echo "Done." fi sed "s/input_size: null/input_size: 80/g" ${sv_exp_dir}/sv.yaml > ${sv_exp_dir}/sv_fbank.yaml for dset in swbd_sre/none_silence callhome1/nonoverlap_0s callhome2/nonoverlap_0s; do key_file=${datadir}/${dset}/feats.scp @@ -301,6 +337,7 @@ ${infer_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1:"${_nj}" "${_logdir}"/sv_inference.JOB.log \ python -m funasr.bin.sv_inference_launch \ --batch_size 1 \ --njob ${njob} \ --ngpu "${_ngpu}" \ --gpuid_list ${gpuid_list} \ --data_path_and_name_and_type "${key_file},speech,kaldi_ark" \ @@ -321,7 +358,7 @@ python -Wignore script/calc_real_meeting_frame_labels.py \ ${datadir}/${dset} ${dumpdir}/${dset}/labels \ --n_spk 8 --frame_shift 0.01 --nj 16 --sr 8000 find `pwd`/${dumpdir}/${dset}/labels -iname "*.lbl.mat" | awk -F'[/.]' '{print $(NF-2),$0}' | sort > ${datadir}/${dset}/labels.scp find `pwd`/${dumpdir}/${dset}/labels/ -iname "*.lbl.mat" | awk -F'[/.]' '{print $(NF-2),$0}' | sort > ${datadir}/${dset}/labels.scp done fi @@ -362,7 +399,7 @@ echo "Stage 8: start to dump for callhome1." python -Wignore script/dump_meeting_chunks.py --dir ${data_dir} \ --out ${dumpdir}/callhome1/dumped_files/data --n_spk 16 --no_pbar --sr 8000 --mode test \ --out ${dumpdir}/callhome1/dumped_files/data --n_spk 16 --no_pbar --sr 8000 --mode train \ --chunk_size 1600 --chunk_shift 400 --add_mid_to_speaker true mkdir -p ${datadir}/callhome1/dumped_files @@ -507,8 +544,8 @@ done fi # Scoring for pretrained model, you may get a DER like 13.73 16.25 # 13.73: with oracle VAD, 16.25: with only SOND outputs, aka, system VAD. # Scoring for pretrained model, you may get a DER like 13.29 16.54 # 13.29: with oracle VAD, 16.54: with only SOND outputs, aka, system VAD. if [ ${stage} -le 12 ] && [ ${stop_stage} -ge 12 ]; then echo "stage 12: Scoring phase-1 models" if [ ! -e dscore ]; then @@ -588,7 +625,7 @@ --valid_data_path_and_name_and_type ${datadir}/${valid_set}/dumped_files/profile.scp,profile,kaldi_ark \ --valid_data_path_and_name_and_type ${datadir}/${valid_set}/dumped_files/label.scp,binary_labels,kaldi_ark \ --valid_shape_file ${expdir}/${valid_set}_states/speech_shape \ --init_param exp/${model_dir}/valid.der.ave_5best.pth \ --init_param exp/${model_dir}/valid.der.ave_5best.pb \ --unused_parameters true \ ${init_opt} \ ${freeze_opt} \ @@ -654,8 +691,8 @@ done fi # Scoring for pretrained model, you may get a DER like 11.25 15.30 # 11.25: with oracle VAD, 15.30: with only SOND outputs, aka, system VAD. # Scoring for pretrained model, you may get a DER like 11.54 15.41 # 11.54: with oracle VAD, 15.41: with only SOND outputs, aka, system VAD. if [ ${stage} -le 15 ] && [ ${stop_stage} -ge 15 ]; then echo "stage 15: Scoring phase-2 models" if [ ! -e dscore ]; then @@ -733,7 +770,7 @@ --valid_data_path_and_name_and_type ${datadir}/${valid_set}/dumped_files/profile.scp,profile,kaldi_ark \ --valid_data_path_and_name_and_type ${datadir}/${valid_set}/dumped_files/label.scp,binary_labels,kaldi_ark \ --valid_shape_file ${expdir}/${valid_set}_states/speech_shape \ --init_param exp/${model_dir}_phase2/valid.forward_steps.ave_5best.pth \ --init_param exp/${model_dir}_phase2/valid.forward_steps.ave_5best.pb \ --unused_parameters true \ ${init_opt} \ ${freeze_opt} \ funasr/runtime/docs/SDK_advanced_guide_online_zh.md
@@ -28,6 +28,8 @@ --model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \ --online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx \ --punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx > log.out 2>&1 & # 如果想关闭ssl,增加参数:--certfile "" --keyfile "" ``` 服务端详细参数介绍可参考[服务端参数介绍](#服务端参数介绍) ### 客户端测试与使用 funasr/runtime/run_server.sh
@@ -12,6 +12,18 @@ . ../../egs/aishell/transformer/utils/parse_options.sh || exit 1; cd /workspace/FunASR/funasr/runtime/websocket/build/bin if [ -z "$certfile" ] || [ "$certfile" -eq 0 ]; then ./funasr-wss-server \ --download-model-dir ${download_model_dir} \ --model-dir ${model_dir} \ --vad-dir ${vad_dir} \ --punc-dir ${punc_dir} \ --decoder-thread-num ${decoder_thread_num} \ --io-thread-num ${io_thread_num} \ --port ${port} \ --certfile "" \ --keyfile "" else ./funasr-wss-server \ --download-model-dir ${download_model_dir} \ --model-dir ${model_dir} \ @@ -22,4 +34,4 @@ --port ${port} \ --certfile ${certfile} \ --keyfile ${keyfile} fi funasr/runtime/run_server_2pass.sh
@@ -13,6 +13,19 @@ . ../../egs/aishell/transformer/utils/parse_options.sh || exit 1; cd /workspace/FunASR/funasr/runtime/websocket/build/bin if [ -z "$certfile" ] || [ "$certfile" -eq 0 ]; then ./funasr-wss-server-2pass \ --download-model-dir ${download_model_dir} \ --model-dir ${model_dir} \ --online-model-dir ${online_model_dir} \ --vad-dir ${vad_dir} \ --punc-dir ${punc_dir} \ --decoder-thread-num ${decoder_thread_num} \ --io-thread-num ${io_thread_num} \ --port ${port} \ --certfile "" \ --keyfile "" else ./funasr-wss-server-2pass \ --download-model-dir ${download_model_dir} \ --model-dir ${model_dir} \ @@ -24,4 +37,4 @@ --port ${port} \ --certfile ${certfile} \ --keyfile ${keyfile} fi funasr/runtime/wss-client/FunASRClient_CShape.sln
New file @@ -0,0 +1,31 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.6.33829.357 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FunASRWSClient_Offline", "FunASRWSClient_Offline\FunASRWSClient_Offline.csproj", "{E0986CC4-D443-44E2-96E8-F6E4B691CA57}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FunASRWSClient_Online", "FunASRWSClient_Online\FunASRWSClient_Online.csproj", "{11E80B4F-A838-4DFB-A0C8-9BAE6726BAC0}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU Release|Any CPU = Release|Any CPU EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {E0986CC4-D443-44E2-96E8-F6E4B691CA57}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {E0986CC4-D443-44E2-96E8-F6E4B691CA57}.Debug|Any CPU.Build.0 = Debug|Any CPU {E0986CC4-D443-44E2-96E8-F6E4B691CA57}.Release|Any CPU.ActiveCfg = Release|Any CPU {E0986CC4-D443-44E2-96E8-F6E4B691CA57}.Release|Any CPU.Build.0 = Release|Any CPU {11E80B4F-A838-4DFB-A0C8-9BAE6726BAC0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {11E80B4F-A838-4DFB-A0C8-9BAE6726BAC0}.Debug|Any CPU.Build.0 = Debug|Any CPU {11E80B4F-A838-4DFB-A0C8-9BAE6726BAC0}.Release|Any CPU.ActiveCfg = Release|Any CPU {11E80B4F-A838-4DFB-A0C8-9BAE6726BAC0}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {E8483245-31D3-4C42-AAF9-B3195EAB97C2} EndGlobalSection EndGlobal funasr/runtime/wss-client/FunASRClient_CShape.suoBinary files differ
funasr/runtime/wss-client/FunASRWSClient_Offline/FunASRWSClient_Offline.csproj
New file @@ -0,0 +1,14 @@ <Project Sdk="Microsoft.NET.Sdk"> <PropertyGroup> <OutputType>Exe</OutputType> <TargetFramework>net6.0</TargetFramework> <ImplicitUsings>enable</ImplicitUsings> <Nullable>enable</Nullable> </PropertyGroup> <ItemGroup> <PackageReference Include="Websocket.Client" Version="4.6.1" /> </ItemGroup> </Project> funasr/runtime/wss-client/FunASRWSClient_Offline/Program.cs
New file @@ -0,0 +1,85 @@ using System.Collections.Specialized; using WebSocketSpace; namespace FunASRWSClient_Offline { /// <summary> /// /主程序入口 /// </summary> public class Program { private static void Main() { WSClient_Offline m_funasrclient = new WSClient_Offline(); m_funasrclient.FunASR_Main(); } } public class WSClient_Offline { public static string host = "0.0.0.0"; public static string port = "10095"; private static CWebSocketClient m_websocketclient = new CWebSocketClient(); [STAThread] public async void FunASR_Main() { loadconfig(); //初始化通信连接 string errorStatus = string.Empty; string commstatus = ClientConnTest(); if (commstatus != "通信连接成功") errorStatus = commstatus; //程序初始监测异常--报错、退出 if (errorStatus != string.Empty) { //报错方式待加 Environment.Exit(0); } //循环输入推理文件 while (true) { Console.WriteLine("请输入转录文件路径:"); string filepath = Console.ReadLine(); if (filepath != string.Empty && filepath != null) { await m_websocketclient.ClientSendFileFunc(filepath); } } } private void loadconfig() { string filePath = "config.ini"; NameValueCollection settings = new NameValueCollection(); using (StreamReader reader = new StreamReader(filePath)) { string line; while ((line = reader.ReadLine()) != null) { // 忽略空行和注释 if (string.IsNullOrEmpty(line) || line.StartsWith(";") || line.StartsWith("#")) continue; // 解析键值对 int equalsIndex = line.IndexOf('='); if (equalsIndex > 0) { string key = line.Substring(0, equalsIndex).Trim(); string value = line.Substring(equalsIndex + 1).Trim(); if (key == "host") host = value; else if (key == "port") port = value; } } } } private static string ClientConnTest() { //WebSocket连接状态监测 Task<string> websocketstatus = m_websocketclient.ClientConnTest(); if (websocketstatus != null && websocketstatus.Result.IndexOf("成功") == -1) return websocketstatus.Result; return "通信连接成功"; } } } funasr/runtime/wss-client/FunASRWSClient_Offline/README.md
New file @@ -0,0 +1,9 @@ # cshape-client-offline 这是一个基于FunASR-Websocket服务器的CShape客户端,用于转录本地音频文件。 将配置文件放在与程序相同目录下的config文件夹中,并在config.ini中配置服务器ip地址和端口号。 配置好服务端ip和端口号,在vs中打开需添加Websocket.Client的Nuget程序包后,可直接进行测试,按照控制台提示操作即可。 注:本客户端暂支持wav文件,在win11下完成测试,编译环境VS2022。 funasr/runtime/wss-client/FunASRWSClient_Offline/WebScoketClient.cs
New file @@ -0,0 +1,120 @@ using Websocket.Client; using System.Text.Json; using System.Reactive.Linq; using FunASRWSClient_Offline; namespace WebSocketSpace { internal class CWebSocketClient { private static readonly Uri serverUri = new Uri($"ws://{WSClient_Offline.host}:{WSClient_Offline.port}"); // 你要连接的WebSocket服务器地址 private static WebsocketClient client = new WebsocketClient(serverUri); public async Task<string> ClientConnTest() { string commstatus = "WebSocket通信连接失败"; try { client.Name = "funasr"; client.ReconnectTimeout = null; client.ReconnectionHappened.Subscribe(info => Console.WriteLine($"Reconnection happened, type: {info.Type}, url: {client.Url}")); client.DisconnectionHappened.Subscribe(info => Console.WriteLine($"Disconnection happened, type: {info.Type}")); client .MessageReceived .Where(msg => msg.Text != null) .Subscribe(msg => { recmessage(msg.Text); }); await client.Start(); if (client.IsRunning) commstatus = "WebSocket通信连接成功"; } catch (Exception ex) { Console.WriteLine(ex.ToString()); client.Dispose(); } return commstatus; } public async Task<Task> ClientSendFileFunc(string file_name)//文件转录 { try { if (client.IsRunning) { var exitEvent = new ManualResetEvent(false); string path = Path.GetFileName(file_name); string firstbuff = string.Format("{{\"mode\": \"offline\", \"wav_name\": \"{0}\", \"is_speaking\": true}}", Path.GetFileName(file_name)); client.Send(firstbuff); showWAVForm(client, file_name); } } catch (Exception ex) { Console.WriteLine(ex.ToString()); } return Task.CompletedTask; } public void recmessage(string message) { if (message != null) { try { JsonDocument jsonDoc = JsonDocument.Parse(message); JsonElement root = jsonDoc.RootElement; string mode = root.GetProperty("mode").GetString(); string text = root.GetProperty("text").GetString(); string name = root.GetProperty("wav_name").GetString(); if(name == "asr_stream") Console.WriteLine($"实时识别内容: {text}"); else Console.WriteLine($"文件名称:{name} 文件转录内容: {text}"); } catch (JsonException ex) { Console.WriteLine("JSON 解析错误: " + ex.Message); } } } private void showWAVForm(WebsocketClient client, string file_name) { byte[] getbyte = FileToByte(file_name).Skip(44).ToArray(); for (int i = 0; i < getbyte.Length; i += 1024000) { byte[] send = getbyte.Skip(i).Take(1024000).ToArray(); client.Send(send); Thread.Sleep(5); } Thread.Sleep(10); client.Send("{\"is_speaking\": false}"); } public byte[] FileToByte(string fileUrl) { try { using (FileStream fs = new FileStream(fileUrl, FileMode.Open, FileAccess.Read)) { byte[] byteArray = new byte[fs.Length]; fs.Read(byteArray, 0, byteArray.Length); return byteArray; } } catch { return null; } } } } funasr/runtime/wss-client/FunASRWSClient_Online/FunASRWSClient_Online.csproj
New file @@ -0,0 +1,15 @@ <Project Sdk="Microsoft.NET.Sdk"> <PropertyGroup> <OutputType>Exe</OutputType> <TargetFramework>net6.0</TargetFramework> <ImplicitUsings>enable</ImplicitUsings> <Nullable>enable</Nullable> </PropertyGroup> <ItemGroup> <PackageReference Include="NAudio" Version="2.1.0" /> <PackageReference Include="Websocket.Client" Version="4.6.1" /> </ItemGroup> </Project> funasr/runtime/wss-client/FunASRWSClient_Online/Program.cs
New file @@ -0,0 +1,255 @@ using AliFsmnVadSharp; using NAudio.Wave; using System.Collections.Concurrent; using WebSocketSpace; using NAudio.CoreAudioApi; using System.IO; using System.Collections.Specialized; namespace FunASRWSClient_Online { /// <summary> /// /主程序入口 /// </summary> public class Program { private static void Main() { WSClient_Online m_funasrclient = new WSClient_Online(); m_funasrclient.FunASR_Main(); } } /// <summary> /// /主线程入口,初始化后读取数据 /// </summary> public class WSClient_Online { /// <summary> /// FunASR客户端软件运行状态 /// </summary> /// public static string host = "0.0.0.0"; public static string port = "10095"; public static string onlineasrmode = string.Empty; private static WaveCollect m_wavecollect = new WaveCollect(); private static CWebSocketClient m_websocketclient = new CWebSocketClient(); public static readonly ConcurrentQueue<byte[]> ActiveAudioSet = new ConcurrentQueue<byte[]>(); public static readonly ConcurrentQueue<string> AudioFileQueue = new ConcurrentQueue<string>(); [STAThread] public void FunASR_Main() { loadconfig(); //麦克风状态监测 string errorStatus = string.Empty; if (GetCurrentMicVolume() == -2) errorStatus = "注意:麦克风被设置为静音!"; else if (GetCurrentMicVolume() == -1) errorStatus = "注意:麦克风未连接!"; else if (GetCurrentMicVolume() == 0) errorStatus = "注意:麦克风声音设置为0!"; //初始化通信连接 string commstatus = ClientConnTest(); if (commstatus != "通信连接成功") errorStatus = commstatus; //程序初始监测异常--报错、退出 if (errorStatus != string.Empty) { Environment.Exit(0);//报错方式待加 } //启动客户端向服务端发送音频数据线程 Thread SendAudioThread = new Thread(SendAudioToSeverAsync); SendAudioThread.Start(); //启动音频文件转录线程 Thread AudioFileThread = new Thread(SendAudioFileToSeverAsync); AudioFileThread.Start(); while (true) { Console.WriteLine("请选择语音识别方式:1.离线文件转写;2.实时语音识别"); string str = Console.ReadLine(); if (str != string.Empty) { if (str == "1")//离线文件转写 { onlineasrmode = "offline"; Console.WriteLine("请输入转录文件路径"); str = Console.ReadLine(); if (!string.IsNullOrEmpty(str)) AudioFileQueue.Enqueue(str); } else if (str == "2")//实时语音识别 { Console.WriteLine("请输入实时语音识别模式:1.online;2.2pass"); str = Console.ReadLine(); OnlineASR(str); } } } } private void loadconfig() { string filePath = "config.ini"; NameValueCollection settings = new NameValueCollection(); using (StreamReader reader = new StreamReader(filePath)) { string line; while ((line = reader.ReadLine()) != null) { // 忽略空行和注释 if (string.IsNullOrEmpty(line) || line.StartsWith(";") || line.StartsWith("#")) continue; // 解析键值对 int equalsIndex = line.IndexOf('='); if (equalsIndex > 0) { string key = line.Substring(0, equalsIndex).Trim(); string value = line.Substring(equalsIndex + 1).Trim(); if (key == "host") host = value; else if (key == "port") port = value; } } } } private void OnlineASR(string str) { if (!string.IsNullOrEmpty(str)) { if (str == "1")//实时语音识别 onlineasrmode = "online"; else if (str == "2")//实时语音识别-动态修正 onlineasrmode = "2pass"; } //开始录制声音、发送识别 if (onlineasrmode != string.Empty) { m_wavecollect.StartRec(); m_websocketclient.ClientFirstConnOnline(onlineasrmode); try { while (true) { if (!WaveCollect.voicebuff.IsEmpty) { byte[] buff; int buffcnt = WaveCollect.voicebuff.Count; WaveCollect.voicebuff.TryDequeue(out buff); if (buff != null) ActiveAudioSet.Enqueue(buff); } else { if (Console.KeyAvailable) { var key = Console.ReadKey(true); // 检测到按下Ctrl+C if ((key.Modifiers & ConsoleModifiers.Control) != 0 && key.Key == ConsoleKey.C) { // 执行相应的操作 Console.WriteLine("Ctrl+C Pressed!"); // 退出循环或执行其他操作 break; } } else { Thread.Sleep(10); } } } } catch { Console.WriteLine("实时识别出现异常!"); } finally { m_wavecollect.StopRec(); m_websocketclient.ClientLastConnOnline(); } } } private string ClientConnTest() { //WebSocket连接状态监测 Task<string> websocketstatus = m_websocketclient.ClientConnTest(); if (websocketstatus != null && websocketstatus.Result.IndexOf("成功") == -1) return websocketstatus.Result; return "通信连接成功"; } private void SendAudioFileToSeverAsync() { while (true) { Thread.Sleep(1000); if (AudioFileQueue.Count > 0) { string filepath = string.Empty; AudioFileQueue.TryDequeue(out filepath); if (filepath != string.Empty && filepath != null) { m_websocketclient.ClientSendFileFunc(filepath); } } else { Thread.Sleep(100); } } } private void SendAudioToSeverAsync() { while (true) { if (ActiveAudioSet.Count > 0) { byte[] audio; ActiveAudioSet.TryDequeue(out audio); if (audio == null) continue; byte[] mArray = new byte[audio.Length]; Array.Copy(audio, 0, mArray, 0, audio.Length); if (mArray != null) m_websocketclient.ClientSendAudioFunc(mArray); } else { Thread.Sleep(10); } } } private void SaveAsWav(byte[] pcmData, string fileName, int sampleRate, int bitsPerSample, int channels) { using (var writer = new WaveFileWriter(fileName, new WaveFormat(sampleRate, bitsPerSample, channels))) { writer.Write(pcmData, 0, pcmData.Length); } } private int GetCurrentMicVolume() //获取麦克风设置 { int volume = -1; var enumerator = new MMDeviceEnumerator(); //获取音频输入设备 IEnumerable<MMDevice> captureDevices = enumerator.EnumerateAudioEndPoints(DataFlow.Capture, DeviceState.Active).ToArray(); if (captureDevices.Count() > 0) { MMDevice mMDevice = captureDevices.ToList()[0]; if (mMDevice.AudioEndpointVolume.Mute) return -2; volume = (int)(mMDevice.AudioEndpointVolume.MasterVolumeLevelScalar * 100); } return volume; } } } funasr/runtime/wss-client/FunASRWSClient_Online/README.md
New file @@ -0,0 +1,9 @@ # cshape-client-online 这是一个基于FunASR-Websocket服务器的CShape客户端,用于实时语音识别和转录本地音频文件。 将配置文件放在与程序相同目录下的config文件夹中,并在config.ini中配置服务器ip地址和端口号。 配置好服务端ip和端口号,在vs中打开需添加NAudio和Websocket.Client的Nuget程序包后,可直接进行测试,按照控制台提示操作即可。 注:实时语音识别使用online或2pass,转录文件默认使用offline,在win11下完成测试,编译环境VS2022。 funasr/runtime/wss-client/FunASRWSClient_Online/WaveCollect.cs
New file @@ -0,0 +1,106 @@ using System.Collections.Concurrent; using NAudio.Wave; using NAudio.CoreAudioApi; namespace AliFsmnVadSharp { class WaveCollect { private string fileName = string.Empty; private WaveInEvent? waveSource = null; private WaveFileWriter? waveFile = null; public static int wave_buffer_milliseconds = 600; public static int wave_buffer_collectbits = 16; public static int wave_buffer_collectchannels = 1; public static int wave_buffer_collectfrequency = 16000; public static readonly ConcurrentQueue<byte[]> voicebuff = new ConcurrentQueue<byte[]>(); public void StartRec() { // 获取麦克风设备 var captureDevices = new MMDeviceEnumerator().EnumerateAudioEndPoints(DataFlow.Capture, DeviceState.Active); foreach (var device in captureDevices) { Console.WriteLine("Device Name: " + device.FriendlyName); using (var capture = new WasapiLoopbackCapture(device)) { // 获取支持的采样率列表 Console.WriteLine("Device Channels:" + capture.WaveFormat.Channels); Console.WriteLine("Device SampleRate:" + capture.WaveFormat.SampleRate); Console.WriteLine("Device BitsPerSample:" + capture.WaveFormat.BitsPerSample); } } //清空缓存数据 int buffnum = voicebuff.Count; for (int i = 0; i < buffnum; i++) voicebuff.TryDequeue(out byte[] buff); waveSource = new WaveInEvent(); waveSource.BufferMilliseconds = wave_buffer_milliseconds; waveSource.WaveFormat = new WaveFormat(wave_buffer_collectfrequency, wave_buffer_collectbits, wave_buffer_collectchannels); // 16bit,16KHz,Mono的录音格式 waveSource.DataAvailable += new EventHandler<WaveInEventArgs>(waveSource_DataAvailable); SetFileName(AppDomain.CurrentDomain.BaseDirectory + "tmp.wav"); waveFile = new WaveFileWriter(fileName, waveSource.WaveFormat); waveSource.StartRecording(); } public void StopRec() { if (waveSource != null) { waveSource.StopRecording(); if (waveSource != null) { waveSource.Dispose(); waveSource = null; } if (waveFile != null) { waveFile.Dispose(); waveFile = null; } } } public void SetFileName(string fileName) { this.fileName = fileName; } private void waveSource_DataAvailable(object sender, WaveInEventArgs e) { if (waveFile != null) { if (e.Buffer != null && e.BytesRecorded > 0) { voicebuff.Enqueue(e.Buffer); //waveFile.Write(e.Buffer, 0, e.BytesRecorded); waveFile.Flush(); } } } public static byte[] Wavedata_Dequeue() { byte[] datas; voicebuff.TryDequeue(out datas); return datas; } private void waveSource_RecordingStopped(object sender, StoppedEventArgs e) { if (waveSource != null) { waveSource.Dispose(); waveSource = null; } if (waveFile != null) { waveFile.Dispose(); waveFile = null; } } } } funasr/runtime/wss-client/FunASRWSClient_Online/WebScoketClient.cs
New file @@ -0,0 +1,219 @@ using System.Net.WebSockets; using Websocket.Client; using System.Text.Json; using NAudio.Wave; using AliFsmnVadSharp; using System.Reactive.Linq; using FunASRWSClient_Online; namespace WebSocketSpace { internal class CWebSocketClient { private static int chunk_interval = 10; private static int[] chunk_size = new int[] { 5, 10, 5 }; private static readonly Uri serverUri = new Uri($"ws://{WSClient_Online.host}:{WSClient_Online.port}"); // 你要连接的WebSocket服务器地址 private static WebsocketClient client = new WebsocketClient(serverUri); public async Task<string> ClientConnTest() { string commstatus = "WebSocket通信连接失败"; try { client.Name = "funasr"; client.ReconnectTimeout = null; client.ReconnectionHappened.Subscribe(info => Console.WriteLine($"Reconnection happened, type: {info.Type}, url: {client.Url}")); client.DisconnectionHappened.Subscribe(info => Console.WriteLine($"Disconnection happened, type: {info.Type}")); client .MessageReceived .Where(msg => msg.Text != null) .Subscribe(msg => { rec_message(msg.Text, client); }); await client.Start(); if (client.IsRunning) commstatus = "WebSocket通信连接成功"; } catch (Exception ex) { Console.WriteLine(ex.ToString()); client.Dispose(); } return commstatus; } public bool ClientFirstConnOnline(string asrmode) { if (client.IsRunning) { string firstbuff = string.Format("{{\"mode\": \"{0}\", \"chunk_size\": [{1},{2},{3}], \"chunk_interval\": {4}, \"wav_name\": \"microphone\", \"is_speaking\": true}}" , asrmode, chunk_size[0], chunk_size[1], chunk_size[2], chunk_interval); Task.Run(() => client.Send(firstbuff)); } else { client.Reconnect(); return false; } return true; } public bool ClientSendAudioFunc(byte[] buff) //实时识别 { if (client.IsRunning) { ////发送音频数据 int CHUNK = WaveCollect.wave_buffer_collectfrequency / 1000 * 60 * chunk_size[1] / chunk_interval; for (int i = 0; i < buff.Length; i += CHUNK) { byte[] send = buff.Skip(i).Take(CHUNK).ToArray(); Task.Run(() => client.Send(send)); Thread.Sleep(1); } } else { client.Reconnect(); return false; } return true; } public void ClientLastConnOnline() { Task.Run(() => client.Send("{\"is_speaking\": false}")); } public int ClientSendFileFunc(string file_name)//文件转录 0:发送成功 ret -1:文件类型不支持 -2:通信断开 { string fileExtension = Path.GetExtension(file_name); fileExtension = fileExtension.Replace(".", ""); if (!(fileExtension == "mp3" || fileExtension == "mp4" || fileExtension == "wav" || fileExtension == "pcm")) return -1; if (client.IsRunning) { if (fileExtension == "wav" || fileExtension == "pcm") { string firstbuff = string.Format("{{\"mode\": \"office\", \"chunk_size\": [{0},{1},{2}], \"chunk_interval\": {3}, \"wav_name\": \"{4}\", \"is_speaking\": true, \"wav_format\":\"pcm\"}}" , chunk_size[0], chunk_size[1], chunk_size[2], chunk_interval, Path.GetFileName(file_name)); Task.Run(() => client.Send(firstbuff)); if (fileExtension == "wav") showWAVForm(file_name); else if (fileExtension == "pcm") showWAVForm_All(file_name); } else if (fileExtension == "mp3" || fileExtension == "mp4") { string firstbuff = string.Format("{{\"mode\": \"offline\", \"chunk_size\": \"{0},{1},{2}\", \"chunk_interval\": {3}, \"wav_name\": \"{4}\", \"is_speaking\": true, \"wav_format\":\"{5}\"}}" , chunk_size[0], chunk_size[1], chunk_size[2], chunk_interval, Path.GetFileName(file_name), fileExtension); Task.Run(() => client.Send(firstbuff)); showWAVForm_All(file_name); } } else { client.Reconnect(); return -2; } return 0; } private string recbuff = string.Empty;//接收累计缓存内容 private string onlinebuff = string.Empty;//接收累计在线缓存内容 public void rec_message(string message, WebsocketClient client) { if (message != null) { try { string name = string.Empty; JsonDocument jsonDoc = JsonDocument.Parse(message); JsonElement root = jsonDoc.RootElement; string mode = root.GetProperty("mode").GetString(); string text = root.GetProperty("text").GetString(); bool isfinal = root.GetProperty("is_final").GetBoolean(); if (message.IndexOf("wav_name ") != -1) name = root.GetProperty("wav_name").GetString(); //if (name == "microphone") // Console.WriteLine($"实时识别内容: {text}"); //else // Console.WriteLine($"文件名称:{name} 文件转录内容: {text}"); if (mode == "2pass-online" && WSClient_Online.onlineasrmode != "offline") { onlinebuff += text; Console.WriteLine(recbuff + onlinebuff); } else if (mode == "2pass-offline") { recbuff += text; onlinebuff = string.Empty; Console.WriteLine(recbuff); } if (isfinal && WSClient_Online.onlineasrmode != "offline")//未结束当前识别 { recbuff = string.Empty; } } catch (JsonException ex) { Console.WriteLine("JSON 解析错误: " + ex.Message); } } } private void showWAVForm(string file_name) { byte[] getbyte = FileToByte(file_name).Skip(44).ToArray(); for (int i = 0; i < getbyte.Length; i += 102400) { byte[] send = getbyte.Skip(i).Take(102400).ToArray(); Task.Run(() => client.Send(send)); Thread.Sleep(5); } Thread.Sleep(100); Task.Run(() => client.Send("{\"is_speaking\": false}")); } private void showWAVForm_All(string file_name) { byte[] getbyte = FileToByte(file_name).ToArray(); for (int i = 0; i < getbyte.Length; i += 1024000) { byte[] send = getbyte.Skip(i).Take(1024000).ToArray(); Task.Run(() => client.Send(send)); Thread.Sleep(5); } Thread.Sleep(10); Task.Run(() => client.Send("{\"is_speaking\": false}")); } public byte[] FileToByte(string fileUrl) { try { using (FileStream fs = new FileStream(fileUrl, FileMode.Open, FileAccess.Read)) { byte[] byteArray = new byte[fs.Length]; fs.Read(byteArray, 0, byteArray.Length); return byteArray; } } catch { return null; } } } } funasr/runtime/wss-client/confg/config.ini
New file @@ -0,0 +1,2 @@ host=127.0.0.1 port=10095 funasr/runtime/wss-client/confg/tmp.wavBinary files differ