python/FunASR-XL.git

parent: ba0325c0 | 补丁 | 提交 | ignore whitespace

fix c# demo project to new onnx model files (#1689)

Dogvane Huang

2024-07-02 dfcc5d47587d3e793cbfec2e9509c0e9a9e1732c

fix c# demo project to new onnx model files (#1689)

9个文件已修改

	runtime/csharp/AliFsmnVad/AliFsmnVadSharp.Examples/AliFsmnVadSharp.Examples.csproj	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	runtime/csharp/AliFsmnVad/AliFsmnVadSharp.Examples/Program.cs	140 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	runtime/csharp/AliFsmnVad/AliFsmnVadSharp/AliFsmnVad.cs	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	runtime/csharp/AliFsmnVad/AliFsmnVadSharp/Model/VadYamlEntity.cs	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	runtime/csharp/AliParaformerAsr/AliParaformerAsr.Examples/AliParaformerAsr.Examples.csproj	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	runtime/csharp/AliParaformerAsr/AliParaformerAsr.Examples/Program.cs	140 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	runtime/csharp/AliParaformerAsr/AliParaformerAsr/AliParaformerAsr.csproj	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineRecognizer.cs	83 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	runtime/csharp/AliParaformerAsr/AliParaformerAsr/Utils/YamlHelper.cs	13 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 runtime/csharp/AliFsmnVad/AliFsmnVadSharp.Examples/AliFsmnVadSharp.Examples.csproj

@@ -8,6 +8,7 @@
  </PropertyGroup>

  <ItemGroup>
    <PackageReference Include="CommandLineParser" Version="2.9.1" />
    <PackageReference Include="NAudio" Version="2.1.0" />
  </ItemGroup>


 runtime/csharp/AliFsmnVad/AliFsmnVadSharp.Examples/Program.cs

@@ -1,61 +1,107 @@
using AliFsmnVadSharp;
using AliFsmnVadSharp.Model;
using CommandLine;
using NAudio.Wave;

internal static class Program
{
    [STAThread]
    private static void Main()
    {
        string applicationBase = AppDomain.CurrentDomain.BaseDirectory;
        string modelFilePath = applicationBase + "./speech_fsmn_vad_zh-cn-16k-common-pytorch/model.onnx";
        string configFilePath = applicationBase + "./speech_fsmn_vad_zh-cn-16k-common-pytorch/vad.yaml";
        string mvnFilePath = applicationBase + "./speech_fsmn_vad_zh-cn-16k-common-pytorch/vad.mvn";
        int batchSize = 2;
        TimeSpan start_time0 = new TimeSpan(DateTime.Now.Ticks);
        AliFsmnVad aliFsmnVad = new AliFsmnVad(modelFilePath, configFilePath, mvnFilePath, batchSize);
        TimeSpan end_time0 = new TimeSpan(DateTime.Now.Ticks);
        double elapsed_milliseconds0 = end_time0.TotalMilliseconds - start_time0.TotalMilliseconds;
        Console.WriteLine("load model and init config elapsed_milliseconds:{0}", elapsed_milliseconds0.ToString());
        List<float[]> samples = new List<float[]>();
        TimeSpan total_duration = new TimeSpan(0L);
        for (int i = 0; i < 2; i++)
        {
            string wavFilePath = string.Format(applicationBase + "./speech_fsmn_vad_zh-cn-16k-common-pytorch/example/{0}.wav", i.ToString());//vad_example
            if (!File.Exists(wavFilePath))
            {
                continue;
            }
            AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
            byte[] datas = new byte[_audioFileReader.Length];
            _audioFileReader.Read(datas, 0, datas.Length);
            TimeSpan duration = _audioFileReader.TotalTime;
            float[] wavdata = new float[datas.Length / 4];
            Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
            float[] sample = wavdata.Select((float x) => x * 32768f).ToArray();
            samples.Add(wavdata);
            total_duration += duration;			
        }
        TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks);
        //SegmentEntity[] segments_duration = aliFsmnVad.GetSegments(samples);
        SegmentEntity[] segments_duration = aliFsmnVad.GetSegmentsByStep(samples);
        TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks);
        Console.WriteLine("vad infer result:");
        foreach (SegmentEntity segment in segments_duration)
        {
            Console.Write("[");
            foreach (var x in segment.Segment) 
            {
                Console.Write("[" + string.Join(",", x.ToArray()) + "]");
            }
            Console.Write("]\r\n");
        }
    public class ProgramParams
    {
        [Option('i', "input", Required = true, HelpText = "Input wav file/folder path.")]
        public string WavFilePath { get; set; }

        double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
        [Option('m', "model", Default = "speech_fsmn_vad_zh-cn-16k-common-onnx", HelpText = "Model path.")]
        public string Model { get; set; }
    }

    [STAThread]
    private static void Main(string[] args)
    {
        var argParams = Parser.Default.ParseArguments<ProgramParams>(args).Value;

        string modelPath = argParams.Model;
        if (!Directory.Exists(argParams.Model))
        {
            modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, modelPath);
            if (!Directory.Exists(modelPath))
            {
                throw new DirectoryNotFoundException($"Model not found: {argParams.Model}");
            }
        }

        string modelFilePath = Path.Combine(modelPath, "model_quant.onnx");
        string configFilePath = Path.Combine(modelPath, "config.yaml");
        string mvnFilePath = Path.Combine(modelPath, "am.mvn");

        int batchSize = 1;
        AliFsmnVad aliFsmnVad = new AliFsmnVad(modelFilePath, configFilePath, mvnFilePath, batchSize);

        List<string> wavFiles = new List<string>();

        if (File.Exists(argParams.WavFilePath))
        {
            wavFiles.Add(argParams.WavFilePath);
        }
        else if (Directory.Exists(argParams.WavFilePath))
        {
            foreach (var wavFilePath in Directory.GetFiles(argParams.WavFilePath, "*.wav"))
            {
                wavFiles.Add(wavFilePath);
            }
        }
        else
        {
            throw new Exception($"Invalid wav input path. {argParams.WavFilePath}");
        }

        var start_time = DateTime.Now;

        TimeSpan total_duration = new TimeSpan(0L);
        for (int i = 0; i < wavFiles.Count; i += batchSize)
        {
            List<float[]> samples = new List<float[]>();
            
            foreach(var wavFile in wavFiles.Skip(i).Take(batchSize))
            {
                (var sample, var duration) = LoadWavFile(wavFile);
                samples.Add(sample);
                total_duration += duration;
            }

            SegmentEntity[] segments_duration = aliFsmnVad.GetSegments(samples);
            Console.WriteLine("vad infer result:");
            foreach (SegmentEntity segment in segments_duration)
            {
                Console.Write("[");
                foreach (var x in segment.Segment)
                {
                    Console.Write("[" + string.Join(",", x.ToArray()) + "]");
                }
                Console.Write("]\r\n");
            }
        }

        var end_time = DateTime.Now;

        double elapsed_milliseconds = (end_time - start_time).TotalMilliseconds;

        double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
        Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
        Console.WriteLine("total_duration:{0}", total_duration.TotalMilliseconds.ToString());
        Console.WriteLine("rtf:{1}", "0".ToString(), rtf.ToString());
        Console.WriteLine("------------------------");
    }

    private static (float[] sample, TimeSpan duration) LoadWavFile(string wavFilePath)
    {
        AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
        byte[] datas = new byte[_audioFileReader.Length];
        _audioFileReader.Read(datas, 0, datas.Length);
        var duration = _audioFileReader.TotalTime;
        float[] wavdata = new float[datas.Length / 4];
        Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
        var sample = wavdata.Select((float x) => x * 32768f).ToArray();

        return (sample, duration);
    }
}

 runtime/csharp/AliFsmnVad/AliFsmnVadSharp/AliFsmnVad.cs

@@ -4,6 +4,8 @@
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;

// 模型文件下载地址：https://modelscope.cn/models/iic/speech_fsmn_vad_zh-cn-16k-common-onnx/

namespace AliFsmnVadSharp
{
    public class AliFsmnVad : IDisposable
@@ -28,9 +30,9 @@
            VadYamlEntity vadYamlEntity = YamlHelper.ReadYaml<VadYamlEntity>(configFilePath);
            _wavFrontend = new WavFrontend(mvnFilePath, vadYamlEntity.frontend_conf);
            _frontend = vadYamlEntity.frontend;
            _vad_post_conf = vadYamlEntity.vad_post_conf;
            _vad_post_conf = vadYamlEntity.model_conf;
            _batchSize = batchSize;
            _max_end_sil = _max_end_sil != int.MinValue ? _max_end_sil : vadYamlEntity.vad_post_conf.max_end_silence_time;
            _max_end_sil = _max_end_sil != int.MinValue ? _max_end_sil : vadYamlEntity.model_conf.max_end_silence_time;
            _encoderConfEntity = vadYamlEntity.encoder_conf;

            ILoggerFactory loggerFactory = new LoggerFactory();

 runtime/csharp/AliFsmnVad/AliFsmnVadSharp/Model/VadYamlEntity.cs

@@ -22,6 +22,6 @@
        public string encoder { get => _encoder; set => _encoder = value; }
        public FrontendConfEntity frontend_conf { get => _frontend_conf; set => _frontend_conf = value; }
        public EncoderConfEntity encoder_conf { get => _encoder_conf; set => _encoder_conf = value; }
        public VadPostConfEntity vad_post_conf { get => _vad_post_conf; set => _vad_post_conf = value; }
        public VadPostConfEntity model_conf { get => _vad_post_conf; set => _vad_post_conf = value; }
    }
}

 runtime/csharp/AliParaformerAsr/AliParaformerAsr.Examples/AliParaformerAsr.Examples.csproj

@@ -5,6 +5,7 @@
    <TargetFramework>net6.0</TargetFramework>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
    <Platforms>AnyCPU;x64</Platforms>
  </PropertyGroup>

  <ItemGroup>
@@ -12,6 +13,7 @@
  </ItemGroup>

  <ItemGroup>
    <PackageReference Include="CommandLineParser" Version="2.9.1" />
    <PackageReference Include="NAudio" Version="2.1.0" />
  </ItemGroup>


 runtime/csharp/AliParaformerAsr/AliParaformerAsr.Examples/Program.cs

@@ -1,66 +1,116 @@
using AliParaformerAsr;
using CommandLine;
using NAudio.Wave;

internal static class Program
{
    [STAThread]
    private static void Main()
    {
        string applicationBase = AppDomain.CurrentDomain.BaseDirectory;
        string modelName = "speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx";
        string modelFilePath = applicationBase + "./"+ modelName + "/model_quant.onnx";
        string configFilePath = applicationBase + "./" + modelName + "/asr.yaml";
        string mvnFilePath = applicationBase + "./" + modelName + "/am.mvn";
        string tokensFilePath = applicationBase + "./" + modelName + "/tokens.txt";
        AliParaformerAsr.OfflineRecognizer offlineRecognizer = new OfflineRecognizer(modelFilePath, configFilePath, mvnFilePath, tokensFilePath);
        List<float[]>? samples = null;
        TimeSpan total_duration = new TimeSpan(0L);
        if (samples == null)

    public class ProgramParams
    {
        [Option('i', "input", Required = true, HelpText = "Input wav file/folder path.")]
        public string WavFilePath { get; set; }

        [Option('m', "model", Default = "speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx", HelpText = "Model path.")]
        public string Model { get; set; }
    }

    [STAThread]
    private static void Main(string[] args)
    {
        var argParams = Parser.Default.ParseArguments<ProgramParams>(args).Value;

        string modelPath = argParams.Model;
        if (!Directory.Exists(argParams.Model))
        {
            samples = new List<float[]>();
            for (int i = 0; i < 5; i++)
            modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, modelPath);
            if (!Directory.Exists(modelPath))
            {
                string wavFilePath = string.Format(applicationBase + "./" + modelName + "/example/{0}.wav", i.ToString());
                if (!File.Exists(wavFilePath))
                {
                    break;
                }
                AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
                byte[] datas = new byte[_audioFileReader.Length];
                _audioFileReader.Read(datas, 0, datas.Length);
                TimeSpan duration = _audioFileReader.TotalTime;
                float[] wavdata = new float[datas.Length / 4];
                Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
                float[] sample = wavdata.Select((float x) => x * 32768f).ToArray();
                samples.Add(sample);
                total_duration += duration;
                throw new DirectoryNotFoundException($"Model not found: {argParams.Model}");
            }
        }
        TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks);
        //1.Non batch method
        foreach (var sample in samples)

        string modelFilePath = Path.Combine(modelPath, "model_quant.onnx");
        string configFilePath = Path.Combine(modelPath, "asr.yaml");
        string mvnFilePath = Path.Combine(modelPath, "am.mvn");
        string tokensFilePath = Path.Combine(modelPath, "tokens.json");

        
        var offlineRecognizer = new OfflineRecognizer(modelFilePath, configFilePath, mvnFilePath, tokensFilePath, OnnxRumtimeTypes.CPU);

        List<float[]> samples = new List<float[]>();
        TimeSpan total_duration = new TimeSpan(0L);

        if (File.Exists(argParams.WavFilePath))
        {
            List<float[]> temp_samples = new List<float[]>();
            temp_samples.Add(sample);
            (var sample, var duration) = LoadWavFile(argParams.WavFilePath);

            samples.Add(sample);
            total_duration += duration;
        }
        else if (Directory.Exists(argParams.WavFilePath)) 
        {
            var findWavCount = 0;

            foreach (var wavFilePath in Directory.EnumerateFiles(argParams.WavFilePath, "*.wav"))
            {
                (var sample, var duration) = LoadWavFile(wavFilePath);

                samples.Add(sample);
                total_duration += duration;
                findWavCount++;
            }

            Console.WriteLine($"Total WAV files found: {findWavCount} duration：{total_duration}");
        }
        else
        {
            throw new Exception($"Invalid wav input path. {argParams.WavFilePath}");
        }

        var start_time = DateTime.Now;

        int batchSize = 1; // 输入参数支持批处理，但是实际效果提升有限，感觉还是负优化，等GPU版本优化后再试
        for (int i = 0; i < samples.Count; i += batchSize)
        {
            List<float[]> temp_samples = samples.Skip(i).Take(batchSize).ToList();

            List<string> results = offlineRecognizer.GetResults(temp_samples);

            foreach (string result in results)
            {
                Console.WriteLine(result);
                Console.WriteLine("");
            }
        }
        //2.batch method
        //List<string> results_batch = offlineRecognizer.GetResults(samples);
        //foreach (string result in results_batch)
        //{
        //    Console.WriteLine(result);
        //    Console.WriteLine("");
        //}
        TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks);
        double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;


        var end_time = DateTime.Now;

        double elapsed_milliseconds = (end_time - start_time).TotalMilliseconds;
        double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;

        Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
        Console.WriteLine("total_duration:{0}", total_duration.TotalMilliseconds.ToString());
        Console.WriteLine("rtf:{1}", "0".ToString(), rtf.ToString());

        // 实时因子是处理时间与音频时长的比值。
        // 例如，如果一个 10 秒的音频片段需要 5 秒来处理，那么实时因子就是 0.5。
        // 如果处理时间和音频时长相等，那么实时因子就是 1，这意味着系统以实时速度进行处理。 
        // 数值越小，表示处理速度越快。
        // from chatgpt 解释
        Console.WriteLine("Real-Time Factor :{0}", rtf.ToString());
        Console.WriteLine("end!");
    }
}

    private static (float[] sample, TimeSpan duration) LoadWavFile(string wavFilePath)
    {
        AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
        byte[] datas = new byte[_audioFileReader.Length];
        _audioFileReader.Read(datas, 0, datas.Length);
        var duration = _audioFileReader.TotalTime;
        float[] wavdata = new float[datas.Length / 4];
        Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
        var sample = wavdata.Select((float x) => x * 32768f).ToArray();

        return (sample, duration);
    }
}

 runtime/csharp/AliParaformerAsr/AliParaformerAsr/AliParaformerAsr.csproj

@@ -9,7 +9,9 @@
  <ItemGroup>
    <PackageReference Include="KaldiNativeFbankSharp" Version="1.0.8" />
    <PackageReference Include="Microsoft.Extensions.Logging" Version="7.0.0" />
    <PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.15.1" />
    <PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.17.3" />
    <PackageReference Include="Microsoft.ML.OnnxRuntime.Managed" Version="1.17.3" />
    <PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
    <PackageReference Include="YamlDotNet" Version="13.1.1" />
  </ItemGroup>


 runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineRecognizer.cs

@@ -9,9 +9,20 @@
using Microsoft.ML.OnnxRuntime.Tensors;
using Microsoft.Extensions.Logging;
using System.Text.RegularExpressions;
using Newtonsoft.Json.Linq;

// 模型文件地址： https://modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
namespace AliParaformerAsr
{
    public enum OnnxRumtimeTypes
    {
        CPU = 0,

        DML = 1,

        CUDA = 2,
    }

    /// <summary>
    /// offline recognizer package
    /// Copyright (c)  2023 by manyeyes
@@ -24,35 +35,70 @@
        private string _frontend;
        private FrontendConfEntity _frontendConfEntity;
        private string[] _tokens;
        private int _batchSize = 1;

        public OfflineRecognizer(string modelFilePath, string configFilePath, string mvnFilePath,string tokensFilePath, int batchSize = 1,int threadsNum=1)
        /// <summary>
        /// 
        /// </summary>
        /// <param name="modelFilePath"></param>
        /// <param name="configFilePath"></param>
        /// <param name="mvnFilePath"></param>
        /// <param name="tokensFilePath"></param>
        /// <param name="rumtimeType">可以选择gpu，但是目前情况下，不建议使用，因为性能提升有限</param>
        /// <param name="deviceId">设备id，多显卡时用于指定执行的显卡</param>
        /// <param name="batchSize"></param>
        /// <param name="threadsNum"></param>
        /// <exception cref="ArgumentException"></exception>
        public OfflineRecognizer(string modelFilePath, string configFilePath, string mvnFilePath, string tokensFilePath, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0)
        {
            Microsoft.ML.OnnxRuntime.SessionOptions options = new Microsoft.ML.OnnxRuntime.SessionOptions();
            var options = new SessionOptions();
            switch(rumtimeType)
            {
                case OnnxRumtimeTypes.DML:
                    options.AppendExecutionProvider_DML(deviceId);
                    break;
                case OnnxRumtimeTypes.CUDA:
                    options.AppendExecutionProvider_CUDA(deviceId);
                    break;
                default:
                    options.AppendExecutionProvider_CPU(deviceId);
                    break;
            }
            //options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
            //options.AppendExecutionProvider_DML(0);
            options.AppendExecutionProvider_CPU(0);
            //options.AppendExecutionProvider_CUDA(0);
            options.InterOpNumThreads = threadsNum;
            _onnxSession = new InferenceSession(modelFilePath, options);

            _tokens = File.ReadAllLines(tokensFilePath);
            _onnxSession = new InferenceSession(modelFilePath, options);
            
            string[] tokenLines;
            if (tokensFilePath.EndsWith(".txt"))
            {
                tokenLines = File.ReadAllLines(tokensFilePath);
            }
            else if (tokensFilePath.EndsWith(".json"))
            {
                string jsonContent = File.ReadAllText(tokensFilePath);
                JArray tokenArray = JArray.Parse(jsonContent);
                tokenLines = tokenArray.Select(t => t.ToString()).ToArray();
            }
            else
            {
                throw new ArgumentException("Invalid tokens file format. Only .txt and .json are supported.");
            }

            _tokens = tokenLines;

            OfflineYamlEntity offlineYamlEntity = YamlHelper.ReadYaml<OfflineYamlEntity>(configFilePath);
            _wavFrontend = new WavFrontend(mvnFilePath, offlineYamlEntity.frontend_conf);
            _frontend = offlineYamlEntity.frontend;
            _frontendConfEntity = offlineYamlEntity.frontend_conf;
            _batchSize = batchSize;
            ILoggerFactory loggerFactory = new LoggerFactory();
            _logger = new Logger<OfflineRecognizer>(loggerFactory);
        }

        public List<string> GetResults(List<float[]> samples)
        {
            this._logger.LogInformation("get features begin");
            _logger.LogInformation("get features begin");
            List<OfflineInputEntity> offlineInputEntities = ExtractFeats(samples);
            OfflineOutputEntity modelOutput = this.Forward(offlineInputEntities);
            List<string> text_results = this.DecodeMulti(modelOutput.Token_nums);
            OfflineOutputEntity modelOutput = Forward(offlineInputEntities);
            List<string> text_results = DecodeMulti(modelOutput.Token_nums);
            return text_results;
        }

@@ -156,15 +202,18 @@
                    {
                        break;
                    }
                    if (_tokens[token] != "</s>" && _tokens[token] != "<s>" && _tokens[token] != "<blank>")

                    string tokenChar = _tokens[token];

                    if (tokenChar != "</s>" && tokenChar != "<s>" && tokenChar != "<blank>")
                    {                        
                        if (IsChinese(_tokens[token],true))
                        if (IsChinese(tokenChar, true))
                        {
                            text_result += _tokens[token];
                            text_result += tokenChar;
                        }
                        else
                        {
                            text_result += "▁" + _tokens[token]+ "▁";
                            text_result += "▁" + tokenChar + "▁";
                        }
                    }
                }

 runtime/csharp/AliParaformerAsr/AliParaformerAsr/Utils/YamlHelper.cs

@@ -14,16 +14,19 @@
    /// YamlHelper
    /// Copyright (c)  2023 by manyeyes
    /// </summary>
    internal class YamlHelper
    internal class YamlHelper 
    {
        public static T ReadYaml<T>(string yamlFilePath)
        public static T ReadYaml<T>(string yamlFilePath) where T:new()
        {
            if (!File.Exists(yamlFilePath))
            {
#pragma warning disable CS8603 // 可能返回 null 引用。
                return default(T);
#pragma warning restore CS8603 // 可能返回 null 引用。
                // 如果允许返回默认对象，则新建一个默认对象，否则应该是抛出异常
                // If allowing to return a default object, create a new default object; otherwise, throw an exception

                return new T();
                // throw new Exception($"not find yaml config file: {yamlFilePath}");
            }

            StreamReader yamlReader = File.OpenText(yamlFilePath);
            Deserializer yamlDeserializer = new Deserializer();
            T info = yamlDeserializer.Deserialize<T>(yamlReader);

			@@ -8,6 +8,7 @@
			</PropertyGroup>

			<ItemGroup>
			<PackageReference Include="CommandLineParser" Version="2.9.1" />
			<PackageReference Include="NAudio" Version="2.1.0" />
			</ItemGroup>

			@@ -1,61 +1,107 @@
			using AliFsmnVadSharp;
			using AliFsmnVadSharp.Model;
			using CommandLine;
			using NAudio.Wave;

			internal static class Program
			{
			[STAThread]
			private static void Main()
			{
			string applicationBase = AppDomain.CurrentDomain.BaseDirectory;
			string modelFilePath = applicationBase + "./speech_fsmn_vad_zh-cn-16k-common-pytorch/model.onnx";
			string configFilePath = applicationBase + "./speech_fsmn_vad_zh-cn-16k-common-pytorch/vad.yaml";
			string mvnFilePath = applicationBase + "./speech_fsmn_vad_zh-cn-16k-common-pytorch/vad.mvn";
			int batchSize = 2;
			TimeSpan start_time0 = new TimeSpan(DateTime.Now.Ticks);
			AliFsmnVad aliFsmnVad = new AliFsmnVad(modelFilePath, configFilePath, mvnFilePath, batchSize);
			TimeSpan end_time0 = new TimeSpan(DateTime.Now.Ticks);
			double elapsed_milliseconds0 = end_time0.TotalMilliseconds - start_time0.TotalMilliseconds;
			Console.WriteLine("load model and init config elapsed_milliseconds:{0}", elapsed_milliseconds0.ToString());
			List<float[]> samples = new List<float[]>();
			TimeSpan total_duration = new TimeSpan(0L);
			for (int i = 0; i < 2; i++)
			{
			string wavFilePath = string.Format(applicationBase + "./speech_fsmn_vad_zh-cn-16k-common-pytorch/example/{0}.wav", i.ToString());//vad_example
			if (!File.Exists(wavFilePath))
			{
			continue;
			}
			AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
			byte[] datas = new byte[_audioFileReader.Length];
			_audioFileReader.Read(datas, 0, datas.Length);
			TimeSpan duration = _audioFileReader.TotalTime;
			float[] wavdata = new float[datas.Length / 4];
			Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
			float[] sample = wavdata.Select((float x) => x * 32768f).ToArray();
			samples.Add(wavdata);
			total_duration += duration;
			}
			TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks);
			//SegmentEntity[] segments_duration = aliFsmnVad.GetSegments(samples);
			SegmentEntity[] segments_duration = aliFsmnVad.GetSegmentsByStep(samples);
			TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks);
			Console.WriteLine("vad infer result:");
			foreach (SegmentEntity segment in segments_duration)
			{
			Console.Write("[");
			foreach (var x in segment.Segment)
			{
			Console.Write("[" + string.Join(",", x.ToArray()) + "]");
			}
			Console.Write("]\r\n");
			}
			public class ProgramParams
			{
			[Option('i', "input", Required = true, HelpText = "Input wav file/folder path.")]
			public string WavFilePath { get; set; }

			double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
			[Option('m', "model", Default = "speech_fsmn_vad_zh-cn-16k-common-onnx", HelpText = "Model path.")]
			public string Model { get; set; }
			}

			[STAThread]
			private static void Main(string[] args)
			{
			var argParams = Parser.Default.ParseArguments<ProgramParams>(args).Value;

			string modelPath = argParams.Model;
			if (!Directory.Exists(argParams.Model))
			{
			modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, modelPath);
			if (!Directory.Exists(modelPath))
			{
			throw new DirectoryNotFoundException($"Model not found: {argParams.Model}");
			}
			}

			string modelFilePath = Path.Combine(modelPath, "model_quant.onnx");
			string configFilePath = Path.Combine(modelPath, "config.yaml");
			string mvnFilePath = Path.Combine(modelPath, "am.mvn");

			int batchSize = 1;
			AliFsmnVad aliFsmnVad = new AliFsmnVad(modelFilePath, configFilePath, mvnFilePath, batchSize);

			List<string> wavFiles = new List<string>();

			if (File.Exists(argParams.WavFilePath))
			{
			wavFiles.Add(argParams.WavFilePath);
			}
			else if (Directory.Exists(argParams.WavFilePath))
			{
			foreach (var wavFilePath in Directory.GetFiles(argParams.WavFilePath, "*.wav"))
			{
			wavFiles.Add(wavFilePath);
			}
			}
			else
			{
			throw new Exception($"Invalid wav input path. {argParams.WavFilePath}");
			}

			var start_time = DateTime.Now;

			TimeSpan total_duration = new TimeSpan(0L);
			for (int i = 0; i < wavFiles.Count; i += batchSize)
			{
			List<float[]> samples = new List<float[]>();

			foreach(var wavFile in wavFiles.Skip(i).Take(batchSize))
			{
			(var sample, var duration) = LoadWavFile(wavFile);
			samples.Add(sample);
			total_duration += duration;
			}

			SegmentEntity[] segments_duration = aliFsmnVad.GetSegments(samples);
			Console.WriteLine("vad infer result:");
			foreach (SegmentEntity segment in segments_duration)
			{
			Console.Write("[");
			foreach (var x in segment.Segment)
			{
			Console.Write("[" + string.Join(",", x.ToArray()) + "]");
			}
			Console.Write("]\r\n");
			}
			}

			var end_time = DateTime.Now;

			double elapsed_milliseconds = (end_time - start_time).TotalMilliseconds;

			double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
			Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
			Console.WriteLine("total_duration:{0}", total_duration.TotalMilliseconds.ToString());
			Console.WriteLine("rtf:{1}", "0".ToString(), rtf.ToString());
			Console.WriteLine("------------------------");
			}

			private static (float[] sample, TimeSpan duration) LoadWavFile(string wavFilePath)
			{
			AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
			byte[] datas = new byte[_audioFileReader.Length];
			_audioFileReader.Read(datas, 0, datas.Length);
			var duration = _audioFileReader.TotalTime;
			float[] wavdata = new float[datas.Length / 4];
			Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
			var sample = wavdata.Select((float x) => x * 32768f).ToArray();

			return (sample, duration);
			}
			}

			@@ -4,6 +4,8 @@
			using Microsoft.ML.OnnxRuntime;
			using Microsoft.ML.OnnxRuntime.Tensors;

			// 模型文件下载地址：https://modelscope.cn/models/iic/speech_fsmn_vad_zh-cn-16k-common-onnx/

			namespace AliFsmnVadSharp
			{
			public class AliFsmnVad : IDisposable
			@@ -28,9 +30,9 @@
			VadYamlEntity vadYamlEntity = YamlHelper.ReadYaml<VadYamlEntity>(configFilePath);
			_wavFrontend = new WavFrontend(mvnFilePath, vadYamlEntity.frontend_conf);
			_frontend = vadYamlEntity.frontend;
			_vad_post_conf = vadYamlEntity.vad_post_conf;
			_vad_post_conf = vadYamlEntity.model_conf;
			_batchSize = batchSize;
			_max_end_sil = _max_end_sil != int.MinValue ? _max_end_sil : vadYamlEntity.vad_post_conf.max_end_silence_time;
			_max_end_sil = _max_end_sil != int.MinValue ? _max_end_sil : vadYamlEntity.model_conf.max_end_silence_time;
			_encoderConfEntity = vadYamlEntity.encoder_conf;

			ILoggerFactory loggerFactory = new LoggerFactory();

			@@ -22,6 +22,6 @@
			public string encoder { get => _encoder; set => _encoder = value; }
			public FrontendConfEntity frontend_conf { get => _frontend_conf; set => _frontend_conf = value; }
			public EncoderConfEntity encoder_conf { get => _encoder_conf; set => _encoder_conf = value; }
			public VadPostConfEntity vad_post_conf { get => _vad_post_conf; set => _vad_post_conf = value; }
			public VadPostConfEntity model_conf { get => _vad_post_conf; set => _vad_post_conf = value; }
			}
			}

			@@ -5,6 +5,7 @@
			<TargetFramework>net6.0</TargetFramework>
			<ImplicitUsings>enable</ImplicitUsings>
			<Nullable>enable</Nullable>
			<Platforms>AnyCPU;x64</Platforms>
			</PropertyGroup>

			<ItemGroup>
			@@ -12,6 +13,7 @@
			</ItemGroup>

			<ItemGroup>
			<PackageReference Include="CommandLineParser" Version="2.9.1" />
			<PackageReference Include="NAudio" Version="2.1.0" />
			</ItemGroup>

			@@ -1,66 +1,116 @@
			using AliParaformerAsr;
			using CommandLine;
			using NAudio.Wave;

			internal static class Program
			{
			[STAThread]
			private static void Main()
			{
			string applicationBase = AppDomain.CurrentDomain.BaseDirectory;
			string modelName = "speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx";
			string modelFilePath = applicationBase + "./"+ modelName + "/model_quant.onnx";
			string configFilePath = applicationBase + "./" + modelName + "/asr.yaml";
			string mvnFilePath = applicationBase + "./" + modelName + "/am.mvn";
			string tokensFilePath = applicationBase + "./" + modelName + "/tokens.txt";
			AliParaformerAsr.OfflineRecognizer offlineRecognizer = new OfflineRecognizer(modelFilePath, configFilePath, mvnFilePath, tokensFilePath);
			List<float[]>? samples = null;
			TimeSpan total_duration = new TimeSpan(0L);
			if (samples == null)

			public class ProgramParams
			{
			[Option('i', "input", Required = true, HelpText = "Input wav file/folder path.")]
			public string WavFilePath { get; set; }

			[Option('m', "model", Default = "speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx", HelpText = "Model path.")]
			public string Model { get; set; }
			}

			[STAThread]
			private static void Main(string[] args)
			{
			var argParams = Parser.Default.ParseArguments<ProgramParams>(args).Value;

			string modelPath = argParams.Model;
			if (!Directory.Exists(argParams.Model))
			{
			samples = new List<float[]>();
			for (int i = 0; i < 5; i++)
			modelPath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, modelPath);
			if (!Directory.Exists(modelPath))
			{
			string wavFilePath = string.Format(applicationBase + "./" + modelName + "/example/{0}.wav", i.ToString());
			if (!File.Exists(wavFilePath))
			{
			break;
			}
			AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
			byte[] datas = new byte[_audioFileReader.Length];
			_audioFileReader.Read(datas, 0, datas.Length);
			TimeSpan duration = _audioFileReader.TotalTime;
			float[] wavdata = new float[datas.Length / 4];
			Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
			float[] sample = wavdata.Select((float x) => x * 32768f).ToArray();
			samples.Add(sample);
			total_duration += duration;
			throw new DirectoryNotFoundException($"Model not found: {argParams.Model}");
			}
			}
			TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks);
			//1.Non batch method
			foreach (var sample in samples)

			string modelFilePath = Path.Combine(modelPath, "model_quant.onnx");
			string configFilePath = Path.Combine(modelPath, "asr.yaml");
			string mvnFilePath = Path.Combine(modelPath, "am.mvn");
			string tokensFilePath = Path.Combine(modelPath, "tokens.json");


			var offlineRecognizer = new OfflineRecognizer(modelFilePath, configFilePath, mvnFilePath, tokensFilePath, OnnxRumtimeTypes.CPU);

			List<float[]> samples = new List<float[]>();
			TimeSpan total_duration = new TimeSpan(0L);

			if (File.Exists(argParams.WavFilePath))
			{
			List<float[]> temp_samples = new List<float[]>();
			temp_samples.Add(sample);
			(var sample, var duration) = LoadWavFile(argParams.WavFilePath);

			samples.Add(sample);
			total_duration += duration;
			}
			else if (Directory.Exists(argParams.WavFilePath))
			{
			var findWavCount = 0;

			foreach (var wavFilePath in Directory.EnumerateFiles(argParams.WavFilePath, "*.wav"))
			{
			(var sample, var duration) = LoadWavFile(wavFilePath);

			samples.Add(sample);
			total_duration += duration;
			findWavCount++;
			}

			Console.WriteLine($"Total WAV files found: {findWavCount} duration：{total_duration}");
			}
			else
			{
			throw new Exception($"Invalid wav input path. {argParams.WavFilePath}");
			}

			var start_time = DateTime.Now;

			int batchSize = 1; // 输入参数支持批处理，但是实际效果提升有限，感觉还是负优化，等GPU版本优化后再试
			for (int i = 0; i < samples.Count; i += batchSize)
			{
			List<float[]> temp_samples = samples.Skip(i).Take(batchSize).ToList();

			List<string> results = offlineRecognizer.GetResults(temp_samples);

			foreach (string result in results)
			{
			Console.WriteLine(result);
			Console.WriteLine("");
			}
			}
			//2.batch method
			//List<string> results_batch = offlineRecognizer.GetResults(samples);
			//foreach (string result in results_batch)
			//{
			// Console.WriteLine(result);
			// Console.WriteLine("");
			//}
			TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks);
			double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;


			var end_time = DateTime.Now;

			double elapsed_milliseconds = (end_time - start_time).TotalMilliseconds;
			double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;

			Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
			Console.WriteLine("total_duration:{0}", total_duration.TotalMilliseconds.ToString());
			Console.WriteLine("rtf:{1}", "0".ToString(), rtf.ToString());

			// 实时因子是处理时间与音频时长的比值。
			// 例如，如果一个 10 秒的音频片段需要 5 秒来处理，那么实时因子就是 0.5。
			// 如果处理时间和音频时长相等，那么实时因子就是 1，这意味着系统以实时速度进行处理。
			// 数值越小，表示处理速度越快。
			// from chatgpt 解释
			Console.WriteLine("Real-Time Factor :{0}", rtf.ToString());
			Console.WriteLine("end!");
			}
			}

			private static (float[] sample, TimeSpan duration) LoadWavFile(string wavFilePath)
			{
			AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath);
			byte[] datas = new byte[_audioFileReader.Length];
			_audioFileReader.Read(datas, 0, datas.Length);
			var duration = _audioFileReader.TotalTime;
			float[] wavdata = new float[datas.Length / 4];
			Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length);
			var sample = wavdata.Select((float x) => x * 32768f).ToArray();

			return (sample, duration);
			}
			}

			@@ -9,7 +9,9 @@
			<ItemGroup>
			<PackageReference Include="KaldiNativeFbankSharp" Version="1.0.8" />
			<PackageReference Include="Microsoft.Extensions.Logging" Version="7.0.0" />
			<PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.15.1" />
			<PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.17.3" />
			<PackageReference Include="Microsoft.ML.OnnxRuntime.Managed" Version="1.17.3" />
			<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
			<PackageReference Include="YamlDotNet" Version="13.1.1" />
			</ItemGroup>

			@@ -9,9 +9,20 @@
			using Microsoft.ML.OnnxRuntime.Tensors;
			using Microsoft.Extensions.Logging;
			using System.Text.RegularExpressions;
			using Newtonsoft.Json.Linq;

			// 模型文件地址： https://modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
			namespace AliParaformerAsr
			{
			public enum OnnxRumtimeTypes
			{
			CPU = 0,

			DML = 1,

			CUDA = 2,
			}

			/// <summary>
			/// offline recognizer package
			/// Copyright (c) 2023 by manyeyes
			@@ -24,35 +35,70 @@
			private string _frontend;
			private FrontendConfEntity _frontendConfEntity;
			private string[] _tokens;
			private int _batchSize = 1;

			public OfflineRecognizer(string modelFilePath, string configFilePath, string mvnFilePath,string tokensFilePath, int batchSize = 1,int threadsNum=1)
			/// <summary>
			///
			/// </summary>
			/// <param name="modelFilePath"></param>
			/// <param name="configFilePath"></param>
			/// <param name="mvnFilePath"></param>
			/// <param name="tokensFilePath"></param>
			/// <param name="rumtimeType">可以选择gpu，但是目前情况下，不建议使用，因为性能提升有限</param>
			/// <param name="deviceId">设备id，多显卡时用于指定执行的显卡</param>
			/// <param name="batchSize"></param>
			/// <param name="threadsNum"></param>
			/// <exception cref="ArgumentException"></exception>
			public OfflineRecognizer(string modelFilePath, string configFilePath, string mvnFilePath, string tokensFilePath, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0)
			{
			Microsoft.ML.OnnxRuntime.SessionOptions options = new Microsoft.ML.OnnxRuntime.SessionOptions();
			var options = new SessionOptions();
			switch(rumtimeType)
			{
			case OnnxRumtimeTypes.DML:
			options.AppendExecutionProvider_DML(deviceId);
			break;
			case OnnxRumtimeTypes.CUDA:
			options.AppendExecutionProvider_CUDA(deviceId);
			break;
			default:
			options.AppendExecutionProvider_CPU(deviceId);
			break;
			}
			//options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
			//options.AppendExecutionProvider_DML(0);
			options.AppendExecutionProvider_CPU(0);
			//options.AppendExecutionProvider_CUDA(0);
			options.InterOpNumThreads = threadsNum;
			_onnxSession = new InferenceSession(modelFilePath, options);

			_tokens = File.ReadAllLines(tokensFilePath);
			_onnxSession = new InferenceSession(modelFilePath, options);

			string[] tokenLines;
			if (tokensFilePath.EndsWith(".txt"))
			{
			tokenLines = File.ReadAllLines(tokensFilePath);
			}
			else if (tokensFilePath.EndsWith(".json"))
			{
			string jsonContent = File.ReadAllText(tokensFilePath);
			JArray tokenArray = JArray.Parse(jsonContent);
			tokenLines = tokenArray.Select(t => t.ToString()).ToArray();
			}
			else
			{
			throw new ArgumentException("Invalid tokens file format. Only .txt and .json are supported.");
			}

			_tokens = tokenLines;

			OfflineYamlEntity offlineYamlEntity = YamlHelper.ReadYaml<OfflineYamlEntity>(configFilePath);
			_wavFrontend = new WavFrontend(mvnFilePath, offlineYamlEntity.frontend_conf);
			_frontend = offlineYamlEntity.frontend;
			_frontendConfEntity = offlineYamlEntity.frontend_conf;
			_batchSize = batchSize;
			ILoggerFactory loggerFactory = new LoggerFactory();
			_logger = new Logger<OfflineRecognizer>(loggerFactory);
			}

			public List<string> GetResults(List<float[]> samples)
			{
			this._logger.LogInformation("get features begin");
			_logger.LogInformation("get features begin");
			List<OfflineInputEntity> offlineInputEntities = ExtractFeats(samples);
			OfflineOutputEntity modelOutput = this.Forward(offlineInputEntities);
			List<string> text_results = this.DecodeMulti(modelOutput.Token_nums);
			OfflineOutputEntity modelOutput = Forward(offlineInputEntities);
			List<string> text_results = DecodeMulti(modelOutput.Token_nums);
			return text_results;
			}

			@@ -156,15 +202,18 @@
			{
			break;
			}
			if (_tokens[token] != "</s>" && _tokens[token] != "<s>" && _tokens[token] != "<blank>")

			string tokenChar = _tokens[token];

			if (tokenChar != "</s>" && tokenChar != "<s>" && tokenChar != "<blank>")
			{
			if (IsChinese(_tokens[token],true))
			if (IsChinese(tokenChar, true))
			{
			text_result += _tokens[token];
			text_result += tokenChar;
			}
			else
			{
			text_result += "▁" + _tokens[token]+ "▁";
			text_result += "▁" + tokenChar + "▁";
			}
			}
			}

			@@ -14,16 +14,19 @@
			/// YamlHelper
			/// Copyright (c) 2023 by manyeyes
			/// </summary>
			internal class YamlHelper
			internal class YamlHelper
			{
			public static T ReadYaml<T>(string yamlFilePath)
			public static T ReadYaml<T>(string yamlFilePath) where T:new()
			{
			if (!File.Exists(yamlFilePath))
			{
			#pragma warning disable CS8603 // 可能返回 null 引用。
			return default(T);
			#pragma warning restore CS8603 // 可能返回 null 引用。
			// 如果允许返回默认对象，则新建一个默认对象，否则应该是抛出异常
			// If allowing to return a default object, create a new default object; otherwise, throw an exception

			return new T();
			// throw new Exception($"not find yaml config file: {yamlFilePath}");
			}

			StreamReader yamlReader = File.OpenText(yamlFilePath);
			Deserializer yamlDeserializer = new Deserializer();
			T info = yamlDeserializer.Deserialize<T>(yamlReader);