雾聪
2023-08-17 7d49a7257d658f2a623c9c972a46401dd4c50e52
Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main
7个文件已修改
13个文件已添加
992 ■■■■■ 已修改文件
egs/callhome/TOLD/soap/conf/EAND_ResNet34_SAN_L4N512_None_FFN_FSMN_L6N512_bce_dia_loss_01.yaml 3 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs/callhome/TOLD/soap/conf/EAND_ResNet34_SAN_L4N512_None_FFN_FSMN_L6N512_bce_dia_loss_01_phase2.yaml 3 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs/callhome/TOLD/soap/conf/EAND_ResNet34_SAN_L4N512_None_FFN_FSMN_L6N512_bce_dia_loss_01_phase3.yaml 3 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs/callhome/TOLD/soap/run.sh 87 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/docs/SDK_advanced_guide_online_zh.md 2 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/run_server.sh 14 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/run_server_2pass.sh 15 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/FunASRClient_CShape.sln 31 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/FunASRClient_CShape.suo 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/FunASRWSClient_Offline/FunASRWSClient_Offline.csproj 14 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/FunASRWSClient_Offline/Program.cs 85 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/FunASRWSClient_Offline/README.md 9 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/FunASRWSClient_Offline/WebScoketClient.cs 120 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/FunASRWSClient_Online/FunASRWSClient_Online.csproj 15 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/FunASRWSClient_Online/Program.cs 255 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/FunASRWSClient_Online/README.md 9 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/FunASRWSClient_Online/WaveCollect.cs 106 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/FunASRWSClient_Online/WebScoketClient.cs 219 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/confg/config.ini 2 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/wss-client/confg/tmp.wav 补丁 | 查看 | 原始文档 | blame | 历史
egs/callhome/TOLD/soap/conf/EAND_ResNet34_SAN_L4N512_None_FFN_FSMN_L6N512_bce_dia_loss_01.yaml
@@ -1,3 +1,4 @@
init: xavier_uniform
model: sond
model_conf:
    lsm_weight: 0.0
@@ -98,7 +99,7 @@
num_workers: 8
max_epoch: 20
num_iters_per_epoch: 10000
keep_nbest_models: 20
keep_nbest_models: 5
# optimization related
accum_grad: 1
egs/callhome/TOLD/soap/conf/EAND_ResNet34_SAN_L4N512_None_FFN_FSMN_L6N512_bce_dia_loss_01_phase2.yaml
@@ -1,3 +1,4 @@
init: xavier_uniform
model: sond
model_conf:
    lsm_weight: 0.0
@@ -98,7 +99,7 @@
num_workers: 8
max_epoch: 30
num_iters_per_epoch: 10000
keep_nbest_models: 30
keep_nbest_models: 5
# optimization related
accum_grad: 1
egs/callhome/TOLD/soap/conf/EAND_ResNet34_SAN_L4N512_None_FFN_FSMN_L6N512_bce_dia_loss_01_phase3.yaml
@@ -1,3 +1,4 @@
init: xavier_uniform
model: sond
model_conf:
    lsm_weight: 0.0
@@ -96,7 +97,7 @@
# 6 samples
batch_size: 6
num_workers: 8
max_epoch: 12
max_epoch: 10
num_iters_per_epoch: 300
keep_nbest_models: 5
egs/callhome/TOLD/soap/run.sh
@@ -8,7 +8,7 @@
# [2] Speaker Overlap-aware Neural Diarization for Multi-party Meeting Analysis, EMNLP 2022
# We recommend you run this script stage by stage.
# [developing] This recipe includes:
# This recipe includes:
# 1. simulating data with switchboard and NIST.
# 2. training the model from scratch for 3 stages:
#   2-1. pre-train on simu_swbd_sre
@@ -18,6 +18,7 @@
# Finally, you will get a similar DER result claimed in the paper.
# environment configuration
# path/to/kaldi
kaldi_root=
if [ -z "${kaldi_root}" ]; then
@@ -34,21 +35,35 @@
  ln -s ${kaldi_root}/egs/callhome_diarization/v2/utils ./utils
fi
# path to Switchboard and NIST including:
# LDC98S75, LDC99S79, LDC2002S06, LDC2001S13, LDC2004S07
data_root=
if [ -z "${data_root}" ]; then
  echo "We need Switchboard and NIST to simulate data for pretraining."
  echo "If you can't get them, please use 'finetune.sh' to finetune a pretrained model."
  exit;
fi
# path/to/NIST/LDC2001S97
callhome_root=
if [ -z "${callhome_root}" ]; then
  echo "We need callhome corpus for training."
  echo "If you want inference only, please refer https://www.modelscope.cn/models/damo/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/summary"
  exit;
fi
# machines configuration
gpu_devices="4,5,6,7"  # for V100-16G, use 4 GPUs
gpu_num=4
count=1
# general configuration
stage=3
stop_stage=3
stage=0
stop_stage=19
# number of jobs for data process
nj=16
sr=8000
# dataset related
data_root=
callhome_root=path/to/NIST/LDC2001S97
# experiment configuration
lang=en
@@ -68,16 +83,16 @@
freeze_param=
# inference related
inference_model=valid.der.ave_5best.pth
inference_model=valid.der.ave_5best.pb
inference_config=conf/basic_inference.yaml
inference_tag=""
test_sets="callhome1"
test_sets="callhome2"
gpu_inference=true  # Whether to perform gpu decoding, set false for cpu decoding
# number of jobs for inference
# for gpu decoding, inference_nj=ngpu*njob; for cpu decoding, inference_nj=njob
njob=5
njob=4
infer_cmd=utils/run.pl
told_max_iter=2
told_max_iter=4
. utils/parse_options.sh || exit 1;
@@ -127,6 +142,22 @@
  # 3. Prepare the Callhome portion of NIST SRE 2000.
  local/make_callhome.sh ${callhome_root} ${datadir}/
  # 4. split ref.rttm
  for dset in callhome1 callhome2; do
    rm -rf ${datadir}/${dset}/ref.rttm
    for name in `awk '{print $1}' ${datadir}/${dset}/wav.scp`; do
      grep ${name} ${datadir}/callhome/fullref.rttm >> ${datadir}/${dset}/ref.rttm;
    done
    # filter out records which don't have rttm labels.
    awk '{print $2}' ${datadir}/${dset}/ref.rttm | sort | uniq > ${datadir}/${dset}/uttid
    mv ${datadir}/${dset}/wav.scp ${datadir}/${dset}/wav.scp.bak
    awk '{if (NR==FNR){a[$1]=1}else{if (a[$1]==1){print $0}}}' ${datadir}/${dset}/uttid ${datadir}/${dset}/wav.scp.bak > ${datadir}/${dset}/wav.scp
    mkdir ${datadir}/${dset}/raw
    mv ${datadir}/${dset}/{reco2num_spk,segments,spk2utt,utt2spk,uttid,wav.scp.bak} ${datadir}/${dset}/raw/
    awk '{print $1,$1}' ${datadir}/${dset}/wav.scp > ${datadir}/${dset}/utt2spk
  done
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
@@ -156,10 +187,10 @@
    mkdir -p ${dumpdir}/${dset}/nonoverlap_0s
    python -Wignore script/extract_nonoverlap_segments.py \
      ${datadir}/${dset}/wav.scp ${datadir}/${dset}/ref.rttm ${dumpdir}/${dset}/nonoverlap_0s \
      --min_dur 0 --max_spk_num 8 --sr ${sr} --no_pbar --nj ${nj}
      --min_dur 0.1 --max_spk_num 8 --sr ${sr} --no_pbar --nj ${nj}
    mkdir -p ${datadir}/${dset}/nonoverlap_0s
    find `pwd`/${dumpdir}/${dset}/nonoverlap_0s | sort | awk -F'[/.]' '{print $(NF-1),$0}' > ${datadir}/${dset}/nonoverlap_0s/wav.scp
    find ${dumpdir}/${dset}/nonoverlap_0s/ -iname "*.wav" | sort | awk -F'[/.]' '{print $(NF-1),$0}' > ${datadir}/${dset}/nonoverlap_0s/wav.scp
    awk -F'[/.]' '{print $(NF-1),$(NF-2)}' ${datadir}/${dset}/nonoverlap_0s/wav.scp > ${datadir}/${dset}/nonoverlap_0s/utt2spk
    echo "Done."
  done
@@ -279,11 +310,16 @@
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
  echo "Stage 6: Extract speaker embeddings."
  git lfs install
  git clone https://www.modelscope.cn/damo/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch.git
  mv speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch ${expdir}/
  sv_exp_dir=exp/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch
  if [ ! -e ${sv_exp_dir} ]; then
    echo "start to download sv models"
    git lfs install
    git clone https://www.modelscope.cn/damo/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch.git
    mv speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch ${expdir}/
    echo "Done."
  fi
  sed "s/input_size: null/input_size: 80/g" ${sv_exp_dir}/sv.yaml > ${sv_exp_dir}/sv_fbank.yaml
  for dset in swbd_sre/none_silence callhome1/nonoverlap_0s callhome2/nonoverlap_0s; do
    key_file=${datadir}/${dset}/feats.scp
@@ -301,6 +337,7 @@
    ${infer_cmd} --gpu "${_ngpu}" --max-jobs-run "${_nj}" JOB=1:"${_nj}" "${_logdir}"/sv_inference.JOB.log \
      python -m funasr.bin.sv_inference_launch \
        --batch_size 1 \
        --njob ${njob} \
        --ngpu "${_ngpu}" \
        --gpuid_list ${gpuid_list} \
        --data_path_and_name_and_type "${key_file},speech,kaldi_ark" \
@@ -321,7 +358,7 @@
    python -Wignore script/calc_real_meeting_frame_labels.py \
          ${datadir}/${dset} ${dumpdir}/${dset}/labels \
          --n_spk 8 --frame_shift 0.01 --nj 16 --sr 8000
    find `pwd`/${dumpdir}/${dset}/labels -iname "*.lbl.mat" | awk -F'[/.]' '{print $(NF-2),$0}' | sort > ${datadir}/${dset}/labels.scp
    find `pwd`/${dumpdir}/${dset}/labels/ -iname "*.lbl.mat" | awk -F'[/.]' '{print $(NF-2),$0}' | sort > ${datadir}/${dset}/labels.scp
  done
fi
@@ -362,7 +399,7 @@
  echo "Stage 8: start to dump for callhome1."
  python -Wignore script/dump_meeting_chunks.py --dir ${data_dir} \
    --out ${dumpdir}/callhome1/dumped_files/data --n_spk 16 --no_pbar --sr 8000 --mode test \
    --out ${dumpdir}/callhome1/dumped_files/data --n_spk 16 --no_pbar --sr 8000 --mode train \
    --chunk_size 1600 --chunk_shift 400 --add_mid_to_speaker true
  mkdir -p ${datadir}/callhome1/dumped_files
@@ -507,8 +544,8 @@
    done
fi
# Scoring for pretrained model, you may get a DER like 13.73 16.25
# 13.73: with oracle VAD, 16.25: with only SOND outputs, aka, system VAD.
# Scoring for pretrained model, you may get a DER like 13.29 16.54
# 13.29: with oracle VAD, 16.54: with only SOND outputs, aka, system VAD.
if [ ${stage} -le 12 ] && [ ${stop_stage} -ge 12 ]; then
  echo "stage 12: Scoring phase-1 models"
  if [ ! -e dscore ]; then
@@ -588,7 +625,7 @@
                --valid_data_path_and_name_and_type ${datadir}/${valid_set}/dumped_files/profile.scp,profile,kaldi_ark \
                --valid_data_path_and_name_and_type ${datadir}/${valid_set}/dumped_files/label.scp,binary_labels,kaldi_ark \
                --valid_shape_file ${expdir}/${valid_set}_states/speech_shape \
                --init_param exp/${model_dir}/valid.der.ave_5best.pth \
                --init_param exp/${model_dir}/valid.der.ave_5best.pb \
                --unused_parameters true \
                ${init_opt} \
                ${freeze_opt} \
@@ -654,8 +691,8 @@
    done
fi
# Scoring for pretrained model, you may get a DER like 11.25 15.30
# 11.25: with oracle VAD, 15.30: with only SOND outputs, aka, system VAD.
# Scoring for pretrained model, you may get a DER like 11.54 15.41
# 11.54: with oracle VAD, 15.41: with only SOND outputs, aka, system VAD.
if [ ${stage} -le 15 ] && [ ${stop_stage} -ge 15 ]; then
  echo "stage 15: Scoring phase-2 models"
  if [ ! -e dscore ]; then
@@ -733,7 +770,7 @@
                --valid_data_path_and_name_and_type ${datadir}/${valid_set}/dumped_files/profile.scp,profile,kaldi_ark \
                --valid_data_path_and_name_and_type ${datadir}/${valid_set}/dumped_files/label.scp,binary_labels,kaldi_ark \
                --valid_shape_file ${expdir}/${valid_set}_states/speech_shape \
                --init_param exp/${model_dir}_phase2/valid.forward_steps.ave_5best.pth \
                --init_param exp/${model_dir}_phase2/valid.forward_steps.ave_5best.pb \
                --unused_parameters true \
                ${init_opt} \
                ${freeze_opt} \
funasr/runtime/docs/SDK_advanced_guide_online_zh.md
@@ -28,6 +28,8 @@
  --model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx  \
  --online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx  \
  --punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx > log.out 2>&1 &
# 如果想关闭ssl,增加参数:--certfile "" --keyfile ""
```
服务端详细参数介绍可参考[服务端参数介绍](#服务端参数介绍)
### 客户端测试与使用
funasr/runtime/run_server.sh
@@ -12,6 +12,18 @@
. ../../egs/aishell/transformer/utils/parse_options.sh || exit 1;
cd /workspace/FunASR/funasr/runtime/websocket/build/bin
if [ -z "$certfile" ] || [ "$certfile" -eq 0 ]; then
./funasr-wss-server  \
  --download-model-dir ${download_model_dir} \
  --model-dir ${model_dir} \
  --vad-dir ${vad_dir} \
  --punc-dir ${punc_dir} \
  --decoder-thread-num ${decoder_thread_num} \
  --io-thread-num  ${io_thread_num} \
  --port ${port} \
  --certfile  "" \
  --keyfile ""
else
./funasr-wss-server  \
  --download-model-dir ${download_model_dir} \
  --model-dir ${model_dir} \
@@ -22,4 +34,4 @@
  --port ${port} \
  --certfile  ${certfile} \
  --keyfile ${keyfile}
fi
funasr/runtime/run_server_2pass.sh
@@ -13,6 +13,19 @@
. ../../egs/aishell/transformer/utils/parse_options.sh || exit 1;
cd /workspace/FunASR/funasr/runtime/websocket/build/bin
if [ -z "$certfile" ] || [ "$certfile" -eq 0 ]; then
./funasr-wss-server-2pass  \
  --download-model-dir ${download_model_dir} \
  --model-dir ${model_dir} \
  --online-model-dir ${online_model_dir} \
  --vad-dir ${vad_dir} \
  --punc-dir ${punc_dir} \
  --decoder-thread-num ${decoder_thread_num} \
  --io-thread-num  ${io_thread_num} \
  --port ${port} \
  --certfile  "" \
  --keyfile ""
else
./funasr-wss-server-2pass  \
  --download-model-dir ${download_model_dir} \
  --model-dir ${model_dir} \
@@ -24,4 +37,4 @@
  --port ${port} \
  --certfile  ${certfile} \
  --keyfile ${keyfile}
fi
funasr/runtime/wss-client/FunASRClient_CShape.sln
New file
@@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.6.33829.357
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FunASRWSClient_Offline", "FunASRWSClient_Offline\FunASRWSClient_Offline.csproj", "{E0986CC4-D443-44E2-96E8-F6E4B691CA57}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FunASRWSClient_Online", "FunASRWSClient_Online\FunASRWSClient_Online.csproj", "{11E80B4F-A838-4DFB-A0C8-9BAE6726BAC0}"
EndProject
Global
    GlobalSection(SolutionConfigurationPlatforms) = preSolution
        Debug|Any CPU = Debug|Any CPU
        Release|Any CPU = Release|Any CPU
    EndGlobalSection
    GlobalSection(ProjectConfigurationPlatforms) = postSolution
        {E0986CC4-D443-44E2-96E8-F6E4B691CA57}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
        {E0986CC4-D443-44E2-96E8-F6E4B691CA57}.Debug|Any CPU.Build.0 = Debug|Any CPU
        {E0986CC4-D443-44E2-96E8-F6E4B691CA57}.Release|Any CPU.ActiveCfg = Release|Any CPU
        {E0986CC4-D443-44E2-96E8-F6E4B691CA57}.Release|Any CPU.Build.0 = Release|Any CPU
        {11E80B4F-A838-4DFB-A0C8-9BAE6726BAC0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
        {11E80B4F-A838-4DFB-A0C8-9BAE6726BAC0}.Debug|Any CPU.Build.0 = Debug|Any CPU
        {11E80B4F-A838-4DFB-A0C8-9BAE6726BAC0}.Release|Any CPU.ActiveCfg = Release|Any CPU
        {11E80B4F-A838-4DFB-A0C8-9BAE6726BAC0}.Release|Any CPU.Build.0 = Release|Any CPU
    EndGlobalSection
    GlobalSection(SolutionProperties) = preSolution
        HideSolutionNode = FALSE
    EndGlobalSection
    GlobalSection(ExtensibilityGlobals) = postSolution
        SolutionGuid = {E8483245-31D3-4C42-AAF9-B3195EAB97C2}
    EndGlobalSection
EndGlobal
funasr/runtime/wss-client/FunASRClient_CShape.suo
Binary files differ
funasr/runtime/wss-client/FunASRWSClient_Offline/FunASRWSClient_Offline.csproj
New file
@@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">
  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net6.0</TargetFramework>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>
  <ItemGroup>
    <PackageReference Include="Websocket.Client" Version="4.6.1" />
  </ItemGroup>
</Project>
funasr/runtime/wss-client/FunASRWSClient_Offline/Program.cs
New file
@@ -0,0 +1,85 @@
using System.Collections.Specialized;
using WebSocketSpace;
namespace FunASRWSClient_Offline
{
    /// <summary>
    /// /主程序入口
    /// </summary>
    public class Program
    {
        private static void Main()
        {
            WSClient_Offline m_funasrclient = new WSClient_Offline();
            m_funasrclient.FunASR_Main();
        }
    }
    public class WSClient_Offline
    {
        public static string host = "0.0.0.0";
        public static string port = "10095";
        private static CWebSocketClient m_websocketclient = new CWebSocketClient();
        [STAThread]
        public async void FunASR_Main()
        {
            loadconfig();
            //初始化通信连接
            string errorStatus = string.Empty;
            string commstatus = ClientConnTest();
            if (commstatus != "通信连接成功")
                errorStatus = commstatus;
            //程序初始监测异常--报错、退出
            if (errorStatus != string.Empty)
            {
                //报错方式待加
                Environment.Exit(0);
            }
            //循环输入推理文件
            while (true)
            {
                Console.WriteLine("请输入转录文件路径:");
                string filepath = Console.ReadLine();
                if (filepath != string.Empty && filepath != null)
                {
                     await m_websocketclient.ClientSendFileFunc(filepath);
                }
            }
        }
        private void loadconfig()
        {
            string filePath = "config.ini";
            NameValueCollection settings = new NameValueCollection();
            using (StreamReader reader = new StreamReader(filePath))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    // 忽略空行和注释
                    if (string.IsNullOrEmpty(line) || line.StartsWith(";") || line.StartsWith("#"))
                        continue;
                    // 解析键值对
                    int equalsIndex = line.IndexOf('=');
                    if (equalsIndex > 0)
                    {
                        string key = line.Substring(0, equalsIndex).Trim();
                        string value = line.Substring(equalsIndex + 1).Trim();
                        if (key == "host")
                            host = value;
                        else if (key == "port")
                            port = value;
                    }
                }
            }
        }
        private static string ClientConnTest()
        {
            //WebSocket连接状态监测
            Task<string> websocketstatus = m_websocketclient.ClientConnTest();
            if (websocketstatus != null && websocketstatus.Result.IndexOf("成功") == -1)
                return websocketstatus.Result;
            return "通信连接成功";
        }
    }
}
funasr/runtime/wss-client/FunASRWSClient_Offline/README.md
New file
@@ -0,0 +1,9 @@
# cshape-client-offline
这是一个基于FunASR-Websocket服务器的CShape客户端,用于转录本地音频文件。
将配置文件放在与程序相同目录下的config文件夹中,并在config.ini中配置服务器ip地址和端口号。
配置好服务端ip和端口号,在vs中打开需添加Websocket.Client的Nuget程序包后,可直接进行测试,按照控制台提示操作即可。
注:本客户端暂支持wav文件,在win11下完成测试,编译环境VS2022。
funasr/runtime/wss-client/FunASRWSClient_Offline/WebScoketClient.cs
New file
@@ -0,0 +1,120 @@
using Websocket.Client;
using System.Text.Json;
using System.Reactive.Linq;
using FunASRWSClient_Offline;
namespace WebSocketSpace
{
    internal class  CWebSocketClient
    {
        private static readonly Uri serverUri = new Uri($"ws://{WSClient_Offline.host}:{WSClient_Offline.port}"); // 你要连接的WebSocket服务器地址
        private static WebsocketClient client = new WebsocketClient(serverUri);
        public async Task<string> ClientConnTest()
        {
            string commstatus = "WebSocket通信连接失败";
            try
            {
                client.Name = "funasr";
                client.ReconnectTimeout = null;
                client.ReconnectionHappened.Subscribe(info =>
                    Console.WriteLine($"Reconnection happened, type: {info.Type}, url: {client.Url}"));
                client.DisconnectionHappened.Subscribe(info =>
                    Console.WriteLine($"Disconnection happened, type: {info.Type}"));
                client
                    .MessageReceived
                    .Where(msg => msg.Text != null)
                    .Subscribe(msg =>
                    {
                        recmessage(msg.Text);
                    });
                await client.Start();
                if (client.IsRunning)
                    commstatus = "WebSocket通信连接成功";
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
                client.Dispose();
            }
            return commstatus;
        }
        public async Task<Task> ClientSendFileFunc(string file_name)//文件转录
        {
            try
            {
                if (client.IsRunning)
                {
                    var exitEvent = new ManualResetEvent(false);
                    string path = Path.GetFileName(file_name);
                    string firstbuff = string.Format("{{\"mode\": \"offline\", \"wav_name\": \"{0}\", \"is_speaking\": true}}", Path.GetFileName(file_name));
                    client.Send(firstbuff);
                    showWAVForm(client, file_name);
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
            }
            return Task.CompletedTask;
        }
        public void recmessage(string message)
        {
            if (message != null)
            {
                try
                {
                    JsonDocument jsonDoc = JsonDocument.Parse(message);
                    JsonElement root = jsonDoc.RootElement;
                    string mode = root.GetProperty("mode").GetString();
                    string text = root.GetProperty("text").GetString();
                    string name = root.GetProperty("wav_name").GetString();
                    if(name == "asr_stream")
                        Console.WriteLine($"实时识别内容: {text}");
                    else
                        Console.WriteLine($"文件名称:{name} 文件转录内容: {text}");
                }
                catch (JsonException ex)
                {
                    Console.WriteLine("JSON 解析错误: " + ex.Message);
                }
            }
        }
        private void showWAVForm(WebsocketClient client, string file_name)
        {
            byte[] getbyte = FileToByte(file_name).Skip(44).ToArray();
            for (int i = 0; i < getbyte.Length; i += 1024000)
            {
                byte[] send = getbyte.Skip(i).Take(1024000).ToArray();
                client.Send(send);
                Thread.Sleep(5);
            }
            Thread.Sleep(10);
            client.Send("{\"is_speaking\": false}");
        }
        public byte[] FileToByte(string fileUrl)
        {
            try
            {
                using (FileStream fs = new FileStream(fileUrl, FileMode.Open, FileAccess.Read))
                {
                    byte[] byteArray = new byte[fs.Length];
                    fs.Read(byteArray, 0, byteArray.Length);
                    return byteArray;
                }
            }
            catch
            {
                return null;
            }
        }
    }
}
funasr/runtime/wss-client/FunASRWSClient_Online/FunASRWSClient_Online.csproj
New file
@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
  <PropertyGroup>
    <OutputType>Exe</OutputType>
    <TargetFramework>net6.0</TargetFramework>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>
  <ItemGroup>
    <PackageReference Include="NAudio" Version="2.1.0" />
    <PackageReference Include="Websocket.Client" Version="4.6.1" />
  </ItemGroup>
</Project>
funasr/runtime/wss-client/FunASRWSClient_Online/Program.cs
New file
@@ -0,0 +1,255 @@
using AliFsmnVadSharp;
using NAudio.Wave;
using System.Collections.Concurrent;
using WebSocketSpace;
using NAudio.CoreAudioApi;
using System.IO;
using System.Collections.Specialized;
namespace FunASRWSClient_Online
{
    /// <summary>
    /// /主程序入口
    /// </summary>
    public class Program
    {
        private static void Main()
        {
            WSClient_Online m_funasrclient = new WSClient_Online();
            m_funasrclient.FunASR_Main();
        }
    }
    /// <summary>
    /// /主线程入口,初始化后读取数据
    /// </summary>
    public class WSClient_Online
    {
        /// <summary>
        /// FunASR客户端软件运行状态
        /// </summary>
        ///
        public static string host = "0.0.0.0";
        public static string port = "10095";
        public static string onlineasrmode = string.Empty;
        private static WaveCollect m_wavecollect = new WaveCollect();
        private static CWebSocketClient m_websocketclient = new CWebSocketClient();
        public static readonly ConcurrentQueue<byte[]> ActiveAudioSet = new ConcurrentQueue<byte[]>();
        public static readonly ConcurrentQueue<string> AudioFileQueue = new ConcurrentQueue<string>();
        [STAThread]
        public void FunASR_Main()
        {
            loadconfig();
            //麦克风状态监测
            string errorStatus = string.Empty;
            if (GetCurrentMicVolume() == -2)
                errorStatus = "注意:麦克风被设置为静音!";
            else if (GetCurrentMicVolume() == -1)
                errorStatus = "注意:麦克风未连接!";
            else if (GetCurrentMicVolume() == 0)
                errorStatus = "注意:麦克风声音设置为0!";
            //初始化通信连接
            string commstatus = ClientConnTest();
            if (commstatus != "通信连接成功")
                errorStatus = commstatus;
            //程序初始监测异常--报错、退出
            if (errorStatus != string.Empty)
            {
                Environment.Exit(0);//报错方式待加
            }
            //启动客户端向服务端发送音频数据线程
            Thread SendAudioThread = new Thread(SendAudioToSeverAsync);
            SendAudioThread.Start();
            //启动音频文件转录线程
            Thread AudioFileThread = new Thread(SendAudioFileToSeverAsync);
            AudioFileThread.Start();
            while (true)
            {
                Console.WriteLine("请选择语音识别方式:1.离线文件转写;2.实时语音识别");
                string str = Console.ReadLine();
                if (str != string.Empty)
                {
                    if (str == "1")//离线文件转写
                    {
                        onlineasrmode = "offline";
                        Console.WriteLine("请输入转录文件路径");
                        str = Console.ReadLine();
                        if (!string.IsNullOrEmpty(str))
                            AudioFileQueue.Enqueue(str);
                    }
                    else if (str == "2")//实时语音识别
                    {
                        Console.WriteLine("请输入实时语音识别模式:1.online;2.2pass");
                        str = Console.ReadLine();
                        OnlineASR(str);
                    }
                }
            }
        }
        private void loadconfig()
        {
            string filePath = "config.ini";
            NameValueCollection settings = new NameValueCollection();
            using (StreamReader reader = new StreamReader(filePath))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    // 忽略空行和注释
                    if (string.IsNullOrEmpty(line) || line.StartsWith(";") || line.StartsWith("#"))
                        continue;
                    // 解析键值对
                    int equalsIndex = line.IndexOf('=');
                    if (equalsIndex > 0)
                    {
                        string key = line.Substring(0, equalsIndex).Trim();
                        string value = line.Substring(equalsIndex + 1).Trim();
                        if (key == "host")
                            host = value;
                        else if (key == "port")
                            port = value;
                    }
                }
            }
        }
        private void OnlineASR(string str)
        {
            if (!string.IsNullOrEmpty(str))
            {
                if (str == "1")//实时语音识别
                    onlineasrmode = "online";
                else if (str == "2")//实时语音识别-动态修正
                    onlineasrmode = "2pass";
            }
            //开始录制声音、发送识别
            if (onlineasrmode != string.Empty)
            {
                m_wavecollect.StartRec();
                m_websocketclient.ClientFirstConnOnline(onlineasrmode);
                try
                {
                    while (true)
                    {
                        if (!WaveCollect.voicebuff.IsEmpty)
                        {
                            byte[] buff;
                            int buffcnt = WaveCollect.voicebuff.Count;
                            WaveCollect.voicebuff.TryDequeue(out buff);
                            if (buff != null)
                                ActiveAudioSet.Enqueue(buff);
                        }
                        else
                        {
                            if (Console.KeyAvailable)
                            {
                                var key = Console.ReadKey(true);
                                // 检测到按下Ctrl+C
                                if ((key.Modifiers & ConsoleModifiers.Control) != 0 && key.Key == ConsoleKey.C)
                                {
                                    // 执行相应的操作
                                    Console.WriteLine("Ctrl+C Pressed!");
                                    // 退出循环或执行其他操作
                                    break;
                                }
                            }
                            else
                            {
                                Thread.Sleep(10);
                            }
                        }
                    }
                }
                catch
                {
                    Console.WriteLine("实时识别出现异常!");
                }
                finally
                {
                    m_wavecollect.StopRec();
                    m_websocketclient.ClientLastConnOnline();
                }
            }
        }
        private string ClientConnTest()
        {
            //WebSocket连接状态监测
            Task<string> websocketstatus = m_websocketclient.ClientConnTest();
            if (websocketstatus != null && websocketstatus.Result.IndexOf("成功") == -1)
                return websocketstatus.Result;
            return "通信连接成功";
        }
        private void SendAudioFileToSeverAsync()
        {
            while (true)
            {
                Thread.Sleep(1000);
                if (AudioFileQueue.Count > 0)
                {
                    string filepath = string.Empty;
                    AudioFileQueue.TryDequeue(out filepath);
                    if (filepath != string.Empty && filepath != null)
                    {
                        m_websocketclient.ClientSendFileFunc(filepath);
                    }
                }
                else
                {
                    Thread.Sleep(100);
                }
            }
        }
        private void SendAudioToSeverAsync()
        {
            while (true)
            {
                if (ActiveAudioSet.Count > 0)
                {
                    byte[] audio;
                    ActiveAudioSet.TryDequeue(out audio);
                    if (audio == null)
                        continue;
                    byte[] mArray = new byte[audio.Length];
                    Array.Copy(audio, 0, mArray, 0, audio.Length);
                    if (mArray != null)
                        m_websocketclient.ClientSendAudioFunc(mArray);
                }
                else
                {
                    Thread.Sleep(10);
                }
            }
        }
        private void SaveAsWav(byte[] pcmData, string fileName, int sampleRate, int bitsPerSample, int channels)
        {
            using (var writer = new WaveFileWriter(fileName, new WaveFormat(sampleRate, bitsPerSample, channels)))
            {
                writer.Write(pcmData, 0, pcmData.Length);
            }
        }
        private int GetCurrentMicVolume()                       //获取麦克风设置
        {
            int volume = -1;
            var enumerator = new MMDeviceEnumerator();
            //获取音频输入设备
            IEnumerable<MMDevice> captureDevices = enumerator.EnumerateAudioEndPoints(DataFlow.Capture, DeviceState.Active).ToArray();
            if (captureDevices.Count() > 0)
            {
                MMDevice mMDevice = captureDevices.ToList()[0];
                if (mMDevice.AudioEndpointVolume.Mute)
                    return -2;
                volume = (int)(mMDevice.AudioEndpointVolume.MasterVolumeLevelScalar * 100);
            }
            return volume;
        }
    }
}
funasr/runtime/wss-client/FunASRWSClient_Online/README.md
New file
@@ -0,0 +1,9 @@
# cshape-client-online
这是一个基于FunASR-Websocket服务器的CShape客户端,用于实时语音识别和转录本地音频文件。
将配置文件放在与程序相同目录下的config文件夹中,并在config.ini中配置服务器ip地址和端口号。
配置好服务端ip和端口号,在vs中打开需添加NAudio和Websocket.Client的Nuget程序包后,可直接进行测试,按照控制台提示操作即可。
注:实时语音识别使用online或2pass,转录文件默认使用offline,在win11下完成测试,编译环境VS2022。
funasr/runtime/wss-client/FunASRWSClient_Online/WaveCollect.cs
New file
@@ -0,0 +1,106 @@
using System.Collections.Concurrent;
using NAudio.Wave;
using NAudio.CoreAudioApi;
namespace AliFsmnVadSharp
{
    class WaveCollect
    {
        private string fileName = string.Empty;
        private WaveInEvent? waveSource = null;
        private WaveFileWriter? waveFile = null;
        public static int wave_buffer_milliseconds = 600;
        public static int wave_buffer_collectbits = 16;
        public static int wave_buffer_collectchannels = 1;
        public static int wave_buffer_collectfrequency = 16000;
        public static readonly ConcurrentQueue<byte[]> voicebuff = new ConcurrentQueue<byte[]>();
        public void StartRec()
        {
            // 获取麦克风设备
            var captureDevices = new MMDeviceEnumerator().EnumerateAudioEndPoints(DataFlow.Capture, DeviceState.Active);
            foreach (var device in captureDevices)
            {
                Console.WriteLine("Device Name: " + device.FriendlyName);
                using (var capture = new WasapiLoopbackCapture(device))
                {
                    // 获取支持的采样率列表
                    Console.WriteLine("Device Channels:" + capture.WaveFormat.Channels);
                    Console.WriteLine("Device SampleRate:" + capture.WaveFormat.SampleRate);
                    Console.WriteLine("Device BitsPerSample:" + capture.WaveFormat.BitsPerSample);
                }
            }
            //清空缓存数据
            int buffnum = voicebuff.Count;
            for (int i = 0; i < buffnum; i++)
                voicebuff.TryDequeue(out byte[] buff);
            waveSource = new WaveInEvent();
            waveSource.BufferMilliseconds = wave_buffer_milliseconds;
            waveSource.WaveFormat = new WaveFormat(wave_buffer_collectfrequency, wave_buffer_collectbits, wave_buffer_collectchannels); // 16bit,16KHz,Mono的录音格式
            waveSource.DataAvailable += new EventHandler<WaveInEventArgs>(waveSource_DataAvailable);
            SetFileName(AppDomain.CurrentDomain.BaseDirectory + "tmp.wav");
            waveFile = new WaveFileWriter(fileName, waveSource.WaveFormat);
            waveSource.StartRecording();
        }
        public void StopRec()
        {
            if (waveSource != null)
            {
                waveSource.StopRecording();
                if (waveSource != null)
                {
                    waveSource.Dispose();
                    waveSource = null;
                }
                if (waveFile != null)
                {
                    waveFile.Dispose();
                    waveFile = null;
                }
            }
        }
        public void SetFileName(string fileName)
        {
            this.fileName = fileName;
        }
        private void waveSource_DataAvailable(object sender, WaveInEventArgs e)
        {
            if (waveFile != null)
            {
                if (e.Buffer != null && e.BytesRecorded > 0)
                {
                    voicebuff.Enqueue(e.Buffer);
                    //waveFile.Write(e.Buffer, 0, e.BytesRecorded);
                    waveFile.Flush();
                }
            }
        }
        public static byte[] Wavedata_Dequeue()
        {
            byte[] datas;
            voicebuff.TryDequeue(out datas);
            return datas;
        }
        private void waveSource_RecordingStopped(object sender, StoppedEventArgs e)
        {
            if (waveSource != null)
            {
                waveSource.Dispose();
                waveSource = null;
            }
            if (waveFile != null)
            {
                waveFile.Dispose();
                waveFile = null;
            }
        }
    }
}
funasr/runtime/wss-client/FunASRWSClient_Online/WebScoketClient.cs
New file
@@ -0,0 +1,219 @@
using System.Net.WebSockets;
using Websocket.Client;
using System.Text.Json;
using NAudio.Wave;
using AliFsmnVadSharp;
using System.Reactive.Linq;
using FunASRWSClient_Online;
namespace WebSocketSpace
{
    internal class  CWebSocketClient
    {
        private static int chunk_interval = 10;
        private static int[] chunk_size = new int[] { 5, 10, 5 };
        private static readonly Uri serverUri = new Uri($"ws://{WSClient_Online.host}:{WSClient_Online.port}"); // 你要连接的WebSocket服务器地址
        private static WebsocketClient client = new WebsocketClient(serverUri);
        public async Task<string> ClientConnTest()
        {
            string commstatus = "WebSocket通信连接失败";
            try
            {
                client.Name = "funasr";
                client.ReconnectTimeout = null;
                client.ReconnectionHappened.Subscribe(info =>
                   Console.WriteLine($"Reconnection happened, type: {info.Type}, url: {client.Url}"));
                client.DisconnectionHappened.Subscribe(info =>
                    Console.WriteLine($"Disconnection happened, type: {info.Type}"));
                client
                    .MessageReceived
                    .Where(msg => msg.Text != null)
                    .Subscribe(msg =>
                {
                    rec_message(msg.Text, client);
                });
                await client.Start();
                if (client.IsRunning)
                    commstatus = "WebSocket通信连接成功";
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
                client.Dispose();
            }
            return commstatus;
        }
        public bool ClientFirstConnOnline(string asrmode)
        {
            if (client.IsRunning)
            {
                string firstbuff = string.Format("{{\"mode\": \"{0}\", \"chunk_size\": [{1},{2},{3}], \"chunk_interval\": {4}, \"wav_name\": \"microphone\", \"is_speaking\": true}}"
                       , asrmode, chunk_size[0], chunk_size[1], chunk_size[2], chunk_interval);
                Task.Run(() => client.Send(firstbuff));
            }
            else
            {
                client.Reconnect();
                return false;
            }
            return true;
        }
        public bool ClientSendAudioFunc(byte[] buff)    //实时识别
        {
            if (client.IsRunning)
            {
                ////发送音频数据
                int CHUNK = WaveCollect.wave_buffer_collectfrequency / 1000 * 60 * chunk_size[1] / chunk_interval;
                for (int i = 0; i < buff.Length; i += CHUNK)
                {
                    byte[] send = buff.Skip(i).Take(CHUNK).ToArray();
                    Task.Run(() => client.Send(send));
                    Thread.Sleep(1);
                }
            }
            else
            {
                client.Reconnect();
                return false;
            }
            return true;
        }
        public void ClientLastConnOnline()
        {
            Task.Run(() => client.Send("{\"is_speaking\": false}"));
        }
        public int ClientSendFileFunc(string file_name)//文件转录 0:发送成功 ret -1:文件类型不支持 -2:通信断开
        {
            string fileExtension = Path.GetExtension(file_name);
            fileExtension = fileExtension.Replace(".", "");
            if (!(fileExtension == "mp3" || fileExtension == "mp4" || fileExtension == "wav" || fileExtension == "pcm"))
                return -1;
            if (client.IsRunning)
            {
                if (fileExtension == "wav" || fileExtension == "pcm")
                {
                    string firstbuff = string.Format("{{\"mode\": \"office\", \"chunk_size\": [{0},{1},{2}], \"chunk_interval\": {3}, \"wav_name\": \"{4}\", \"is_speaking\": true, \"wav_format\":\"pcm\"}}"
                        , chunk_size[0], chunk_size[1], chunk_size[2], chunk_interval, Path.GetFileName(file_name));
                    Task.Run(() => client.Send(firstbuff));
                    if (fileExtension == "wav")
                        showWAVForm(file_name);
                    else if (fileExtension == "pcm")
                        showWAVForm_All(file_name);
                }
                else if (fileExtension == "mp3" || fileExtension == "mp4")
                {
                    string firstbuff = string.Format("{{\"mode\": \"offline\", \"chunk_size\": \"{0},{1},{2}\", \"chunk_interval\": {3}, \"wav_name\": \"{4}\", \"is_speaking\": true, \"wav_format\":\"{5}\"}}"
                        , chunk_size[0], chunk_size[1], chunk_size[2], chunk_interval, Path.GetFileName(file_name), fileExtension);
                    Task.Run(() => client.Send(firstbuff));
                    showWAVForm_All(file_name);
                }
            }
            else
            {
                client.Reconnect();
                return -2;
            }
            return 0;
        }
        private string recbuff = string.Empty;//接收累计缓存内容
        private string onlinebuff = string.Empty;//接收累计在线缓存内容
        public void rec_message(string message, WebsocketClient client)
        {
            if (message != null)
            {
                try
                {
                    string name = string.Empty;
                    JsonDocument jsonDoc = JsonDocument.Parse(message);
                    JsonElement root = jsonDoc.RootElement;
                    string mode = root.GetProperty("mode").GetString();
                    string text = root.GetProperty("text").GetString();
                    bool isfinal = root.GetProperty("is_final").GetBoolean();
                    if (message.IndexOf("wav_name  ") != -1)
                        name = root.GetProperty("wav_name").GetString();
                    //if (name == "microphone")
                    //    Console.WriteLine($"实时识别内容: {text}");
                    //else
                    //    Console.WriteLine($"文件名称:{name} 文件转录内容: {text}");
                    if (mode == "2pass-online" && WSClient_Online.onlineasrmode != "offline")
                    {
                        onlinebuff += text;
                        Console.WriteLine(recbuff + onlinebuff);
                    }
                    else if (mode == "2pass-offline")
                    {
                        recbuff += text;
                        onlinebuff = string.Empty;
                        Console.WriteLine(recbuff);
                    }
                    if (isfinal && WSClient_Online.onlineasrmode != "offline")//未结束当前识别
                    {
                        recbuff = string.Empty;
                    }
                }
                catch (JsonException ex)
                {
                    Console.WriteLine("JSON 解析错误: " + ex.Message);
                }
            }
        }
        private void showWAVForm(string file_name)
        {
            byte[] getbyte = FileToByte(file_name).Skip(44).ToArray();
            for (int i = 0; i < getbyte.Length; i += 102400)
            {
                byte[] send = getbyte.Skip(i).Take(102400).ToArray();
                Task.Run(() => client.Send(send));
                Thread.Sleep(5);
            }
            Thread.Sleep(100);
            Task.Run(() => client.Send("{\"is_speaking\": false}"));
        }
        private void showWAVForm_All(string file_name)
        {
            byte[] getbyte = FileToByte(file_name).ToArray();
            for (int i = 0; i < getbyte.Length; i += 1024000)
            {
                byte[] send = getbyte.Skip(i).Take(1024000).ToArray();
                Task.Run(() => client.Send(send));
                Thread.Sleep(5);
            }
            Thread.Sleep(10);
            Task.Run(() => client.Send("{\"is_speaking\": false}"));
        }
        public byte[] FileToByte(string fileUrl)
        {
            try
            {
                using (FileStream fs = new FileStream(fileUrl, FileMode.Open, FileAccess.Read))
                {
                    byte[] byteArray = new byte[fs.Length];
                    fs.Read(byteArray, 0, byteArray.Length);
                    return byteArray;
                }
            }
            catch
            {
                return null;
            }
        }
    }
}
funasr/runtime/wss-client/confg/config.ini
New file
@@ -0,0 +1,2 @@
host=127.0.0.1
port=10095
funasr/runtime/wss-client/confg/tmp.wav
Binary files differ