manyeyes
2024-07-23 a5a06217c17dc69aca53d1e247b7a6671d35373c
Add support for SenseVoiceSmall onnx model in c# lib (#1946)

* Add support for SenseVoiceSmall onnx model

* parameter modification
6个文件已修改
6个文件已添加
685 ■■■■ 已修改文件
runtime/csharp/AliParaformerAsr/AliParaformerAsr/AliParaformerAsr.csproj 8 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/csharp/AliParaformerAsr/AliParaformerAsr/IOfflineProj.cs 43 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/FrontendConfEntity.cs 10 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelConfEntity.cs 12 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelOutputEntity.cs 17 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineModel.cs 72 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfParaformer.cs 113 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfSenseVoiceSmall.cs 156 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineRecognizer.cs 187 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/csharp/AliParaformerAsr/AliParaformerAsr/Utils/PadHelper.cs 55 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/csharp/AliParaformerAsr/AliParaformerAsr/WavFrontend.cs 3 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/csharp/AliParaformerAsr/README.md 9 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/csharp/AliParaformerAsr/AliParaformerAsr/AliParaformerAsr.csproj
@@ -1,16 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
  <PropertyGroup>
    <TargetFramework>net6.0</TargetFramework>
    <TargetFramework>net8.0</TargetFramework>
    <ImplicitUsings>enable</ImplicitUsings>
    <Nullable>enable</Nullable>
  </PropertyGroup>
  <ItemGroup>
    <PackageReference Include="KaldiNativeFbankSharp" Version="1.0.8" />
    <PackageReference Include="KaldiNativeFbankSharp" Version="1.1.2" />
    <PackageReference Include="Microsoft.Extensions.Logging" Version="7.0.0" />
    <PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.17.3" />
    <PackageReference Include="Microsoft.ML.OnnxRuntime.Managed" Version="1.17.3" />
    <PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.18.1" />
    <PackageReference Include="Microsoft.ML.OnnxRuntime.Managed" Version="1.18.1" />
    <PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
    <PackageReference Include="YamlDotNet" Version="13.1.1" />
  </ItemGroup>
runtime/csharp/AliParaformerAsr/AliParaformerAsr/IOfflineProj.cs
New file
@@ -0,0 +1,43 @@
// See https://github.com/manyeyes for more information
// Copyright (c)  2024 by manyeyes
using AliParaformerAsr.Model;
using Microsoft.ML.OnnxRuntime;
namespace AliParaformerAsr
{
    internal interface IOfflineProj
    {
        InferenceSession ModelSession
        {
            get;
            set;
        }
        int Blank_id
        {
            get;
            set;
        }
        int Sos_eos_id
        {
            get;
            set;
        }
        int Unk_id
        {
            get;
            set;
        }
        int SampleRate
        {
            get;
            set;
        }
        int FeatureDim
        {
            get;
            set;
        }
        internal ModelOutputEntity ModelProj(List<OfflineInputEntity> modelInputs);
        internal void Dispose();
    }
}
runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/FrontendConfEntity.cs
@@ -1,11 +1,5 @@
// See https://github.com/manyeyes for more information
// Copyright (c)  2023 by manyeyes
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace AliParaformerAsr.Model
{
    public class FrontendConfEntity
@@ -15,9 +9,10 @@
        private int _n_mels = 80;
        private int _frame_length = 25;
        private int _frame_shift = 10;
        private float _dither = 0.0F;
        private float _dither = 1.0F;
        private int _lfr_m = 7;
        private int _lfr_n = 6;
        private bool _snip_edges = false;
        public int fs { get => _fs; set => _fs = value; }
        public string window { get => _window; set => _window = value; }
@@ -27,5 +22,6 @@
        public float dither { get => _dither; set => _dither = value; }
        public int lfr_m { get => _lfr_m; set => _lfr_m = value; }
        public int lfr_n { get => _lfr_n; set => _lfr_n = value; }
        public bool snip_edges { get => _snip_edges; set => _snip_edges = value; }
    }
}
runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelConfEntity.cs
@@ -1,11 +1,5 @@
// See https://github.com/manyeyes for more information
// Copyright (c)  2023 by manyeyes
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace AliParaformerAsr.Model
{
    public class ModelConfEntity
@@ -16,6 +10,9 @@
        private float _predictor_weight = 1.0F;
        private int _predictor_bias = 1;
        private float _sampling_ratio = 0.75F;
        private int _sos = 1;
        private int _eos = 2;
        private int _ignore_id = -1;
        public float ctc_weight { get => _ctc_weight; set => _ctc_weight = value; }
        public float lsm_weight { get => _lsm_weight; set => _lsm_weight = value; }
@@ -23,5 +20,8 @@
        public float predictor_weight { get => _predictor_weight; set => _predictor_weight = value; }
        public int predictor_bias { get => _predictor_bias; set => _predictor_bias = value; }
        public float sampling_ratio { get => _sampling_ratio; set => _sampling_ratio = value; }
        public int sos { get => _sos; set => _sos = value; }
        public int eos { get => _eos; set => _eos = value; }
        public int ignore_id { get => _ignore_id; set => _ignore_id = value; }
    }
}
runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelOutputEntity.cs
New file
@@ -0,0 +1,17 @@
// See https://github.com/manyeyes for more information
// Copyright (c)  2024 by manyeyes
using Microsoft.ML.OnnxRuntime.Tensors;
namespace AliParaformerAsr.Model
{
    internal class ModelOutputEntity
    {
        private Tensor<float>? _model_out;
        private int[]? _model_out_lens;
        private Tensor<float>? _cif_peak_tensor;
        public Tensor<float>? model_out { get => _model_out; set => _model_out = value; }
        public int[]? model_out_lens { get => _model_out_lens; set => _model_out_lens = value; }
        public Tensor<float>? cif_peak_tensor { get => _cif_peak_tensor; set => _cif_peak_tensor = value; }
    }
}
runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineModel.cs
New file
@@ -0,0 +1,72 @@
// See https://github.com/manyeyes for more information
// Copyright (c)  2024 by manyeyes
using Microsoft.ML.OnnxRuntime;
namespace AliParaformerAsr
{
    public enum OnnxRumtimeTypes
    {
        CPU = 0,
        DML = 1,
        CUDA = 2,
    }
    public class OfflineModel
    {
        private InferenceSession _modelSession;
        private int _blank_id = 0;
        private int sos_eos_id = 1;
        private int _unk_id = 2;
        private int _featureDim = 80;
        private int _sampleRate = 16000;
        public OfflineModel(string modelFilePath, int threadsNum = 2, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0)
        {
            _modelSession = initModel(modelFilePath, threadsNum, rumtimeType, deviceId);
        }
        public int Blank_id { get => _blank_id; set => _blank_id = value; }
        public int Sos_eos_id { get => sos_eos_id; set => sos_eos_id = value; }
        public int Unk_id { get => _unk_id; set => _unk_id = value; }
        public int FeatureDim { get => _featureDim; set => _featureDim = value; }
        public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; }
        public int SampleRate { get => _sampleRate; set => _sampleRate = value; }
        public InferenceSession initModel(string modelFilePath, int threadsNum = 2, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0)
        {
            var options = new SessionOptions();
            switch (rumtimeType)
            {
                case OnnxRumtimeTypes.DML:
                    options.AppendExecutionProvider_DML(deviceId);
                    break;
                case OnnxRumtimeTypes.CUDA:
                    options.AppendExecutionProvider_CUDA(deviceId);
                    break;
                default:
                    options.AppendExecutionProvider_CPU(deviceId);
                    break;
            }
            //options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
            options.InterOpNumThreads = threadsNum;
            InferenceSession onnxSession = new InferenceSession(modelFilePath, options);
            return onnxSession;
        }
        protected virtual void Dispose(bool disposing)
        {
            if (disposing)
            {
                if (_modelSession != null)
                {
                    _modelSession.Dispose();
                }
            }
        }
        internal void Dispose()
        {
            Dispose(disposing: true);
            GC.SuppressFinalize(this);
        }
    }
}
runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfParaformer.cs
New file
@@ -0,0 +1,113 @@
// See https://github.com/manyeyes for more information
// Copyright (c)  2024 by manyeyes
using AliParaformerAsr.Model;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using AliParaformerAsr.Utils;
namespace AliParaformerAsr
{
    internal class OfflineProjOfParaformer : IOfflineProj, IDisposable
    {
        // To detect redundant calls
        private bool _disposed;
        private InferenceSession _modelSession;
        private int _blank_id = 0;
        private int _sos_eos_id = 1;
        private int _unk_id = 2;
        private int _featureDim = 80;
        private int _sampleRate = 16000;
        public OfflineProjOfParaformer(OfflineModel offlineModel)
        {
            _modelSession = offlineModel.ModelSession;
            _blank_id = offlineModel.Blank_id;
            _sos_eos_id = offlineModel.Sos_eos_id;
            _unk_id = offlineModel.Unk_id;
            _featureDim = offlineModel.FeatureDim;
            _sampleRate = offlineModel.SampleRate;
        }
        public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; }
        public int Blank_id { get => _blank_id; set => _blank_id = value; }
        public int Sos_eos_id { get => _sos_eos_id; set => _sos_eos_id = value; }
        public int Unk_id { get => _unk_id; set => _unk_id = value; }
        public int FeatureDim { get => _featureDim; set => _featureDim = value; }
        public int SampleRate { get => _sampleRate; set => _sampleRate = value; }
        public ModelOutputEntity ModelProj(List<OfflineInputEntity> modelInputs)
        {
            int batchSize = modelInputs.Count;
            float[] padSequence = PadHelper.PadSequence(modelInputs);
            var inputMeta = _modelSession.InputMetadata;
            var container = new List<NamedOnnxValue>();
            foreach (var name in inputMeta.Keys)
            {
                if (name == "speech")
                {
                    int[] dim = new int[] { batchSize, padSequence.Length / 560 / batchSize, 560 };
                    var tensor = new DenseTensor<float>(padSequence, dim, false);
                    container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
                }
                if (name == "speech_lengths")
                {
                    int[] dim = new int[] { batchSize };
                    int[] speech_lengths = new int[batchSize];
                    for (int i = 0; i < batchSize; i++)
                    {
                        speech_lengths[i] = padSequence.Length / 560 / batchSize;
                    }
                    var tensor = new DenseTensor<int>(speech_lengths, dim, false);
                    container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor));
                }
            }
            ModelOutputEntity modelOutputEntity = new ModelOutputEntity();
            try
            {
                IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = _modelSession.Run(container);
                if (results != null)
                {
                    var resultsArray = results.ToArray();
                    modelOutputEntity.model_out = resultsArray[0].AsTensor<float>();
                    modelOutputEntity.model_out_lens = resultsArray[1].AsEnumerable<int>().ToArray();
                    if (resultsArray.Length >= 4)
                    {
                        Tensor<float> cif_peak_tensor = resultsArray[3].AsTensor<float>();
                        modelOutputEntity.cif_peak_tensor = cif_peak_tensor;
                    }
                }
            }
            catch (Exception ex)
            {
                //
            }
            return modelOutputEntity;
        }
        protected virtual void Dispose(bool disposing)
        {
            if (!_disposed)
            {
                if (disposing)
                {
                    if (_modelSession != null)
                    {
                        _modelSession.Dispose();
                    }
                }
                _disposed = true;
            }
        }
        public void Dispose()
        {
            Dispose(disposing: true);
            GC.SuppressFinalize(this);
        }
        ~OfflineProjOfParaformer()
        {
            Dispose(_disposed);
        }
    }
}
runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfSenseVoiceSmall.cs
New file
@@ -0,0 +1,156 @@
// See https://github.com/manyeyes for more information
// Copyright (c)  2024 by manyeyes
using AliParaformerAsr.Model;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using AliParaformerAsr.Utils;
namespace AliParaformerAsr
{
    internal class OfflineProjOfSenseVoiceSmall : IOfflineProj, IDisposable
    {
        // To detect redundant calls
        private bool _disposed;
        private InferenceSession _modelSession;
        private int _blank_id = 0;
        private int _sos_eos_id = 1;
        private int _unk_id = 2;
        private int _featureDim = 80;
        private int _sampleRate = 16000;
        private bool _use_itn = false;
        private string _textnorm = "woitn";
        private Dictionary<string, int> _lidDict = new Dictionary<string, int>() { { "auto", 0 }, { "zh", 3 }, { "en", 4 }, { "yue", 7 }, { "ja", 11 }, { "ko", 12 }, { "nospeech", 13 } };
        private Dictionary<int, int> _lidIntDict = new Dictionary<int, int>() { { 24884, 3 }, { 24885, 4 }, { 24888, 7 }, { 24892, 11 }, { 24896, 12 }, { 24992, 13 } };
        private Dictionary<string, int> _textnormDict = new Dictionary<string, int>() { { "withitn", 14 }, { "woitn", 15 } };
        private Dictionary<int, int> _textnormIntDict = new Dictionary<int, int>() { { 25016, 14 }, { 25017, 15 } };
        public OfflineProjOfSenseVoiceSmall(OfflineModel offlineModel)
        {
            _modelSession = offlineModel.ModelSession;
            _blank_id = offlineModel.Blank_id;
            _sos_eos_id = offlineModel.Sos_eos_id;
            _unk_id = offlineModel.Unk_id;
            _featureDim = offlineModel.FeatureDim;
            _sampleRate = offlineModel.SampleRate;
        }
        public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; }
        public int Blank_id { get => _blank_id; set => _blank_id = value; }
        public int Sos_eos_id { get => _sos_eos_id; set => _sos_eos_id = value; }
        public int Unk_id { get => _unk_id; set => _unk_id = value; }
        public int FeatureDim { get => _featureDim; set => _featureDim = value; }
        public int SampleRate { get => _sampleRate; set => _sampleRate = value; }
        public ModelOutputEntity ModelProj(List<OfflineInputEntity> modelInputs)
        {
            int batchSize = modelInputs.Count;
            float[] padSequence = PadHelper.PadSequence(modelInputs);
            //
            string languageValue = "ja";
            int languageId = 0;
            if (_lidDict.ContainsKey(languageValue))
            {
                languageId = _lidDict.GetValueOrDefault(languageValue);
            }
            string textnormValue = "withitn";
            int textnormId = 15;
            if (_textnormDict.ContainsKey(textnormValue))
            {
                textnormId = _textnormDict.GetValueOrDefault(textnormValue);
            }
            var inputMeta = _modelSession.InputMetadata;
            var container = new List<NamedOnnxValue>();
            foreach (var name in inputMeta.Keys)
            {
                if (name == "speech")
                {
                    int[] dim = new int[] { batchSize, padSequence.Length / 560 / batchSize, 560 };
                    var tensor = new DenseTensor<float>(padSequence, dim, false);
                    container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
                }
                if (name == "speech_lengths")
                {
                    int[] dim = new int[] { batchSize };
                    int[] speech_lengths = new int[batchSize];
                    for (int i = 0; i < batchSize; i++)
                    {
                        speech_lengths[i] = padSequence.Length / 560 / batchSize;
                    }
                    var tensor = new DenseTensor<int>(speech_lengths, dim, false);
                    container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor));
                }
                if (name == "language")
                {
                    int[] language = new int[batchSize];
                    for (int i = 0; i < batchSize; i++)
                    {
                        language[i] = languageId;
                    }
                    int[] dim = new int[] { batchSize };
                    var tensor = new DenseTensor<int>(language, dim, false);
                    container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor));
                }
                if (name == "textnorm")
                {
                    int[] textnorm = new int[batchSize];
                    for (int i = 0; i < batchSize; i++)
                    {
                        textnorm[i] = textnormId;
                    }
                    int[] dim = new int[] { batchSize };
                    var tensor = new DenseTensor<int>(textnorm, dim, false);
                    container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor));
                }
            }
            ModelOutputEntity modelOutputEntity = new ModelOutputEntity();
            try
            {
                IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = _modelSession.Run(container);
                if (results != null)
                {
                    var resultsArray = results.ToArray();
                    modelOutputEntity.model_out = resultsArray[0].AsTensor<float>();
                    modelOutputEntity.model_out_lens = resultsArray[1].AsEnumerable<int>().ToArray();
                    if (resultsArray.Length >= 4)
                    {
                        Tensor<float> cif_peak_tensor = resultsArray[3].AsTensor<float>();
                        modelOutputEntity.cif_peak_tensor = cif_peak_tensor;
                    }
                }
            }
            catch (Exception ex)
            {
                //
            }
            return modelOutputEntity;
        }
        protected virtual void Dispose(bool disposing)
        {
            if (!_disposed)
            {
                if (disposing)
                {
                    if (_modelSession != null)
                    {
                        _modelSession.Dispose();
                    }
                }
                _disposed = true;
            }
        }
        public void Dispose()
        {
            Dispose(disposing: true);
            GC.SuppressFinalize(this);
        }
        ~OfflineProjOfSenseVoiceSmall()
        {
            Dispose(_disposed);
        }
    }
}
runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineRecognizer.cs
@@ -1,28 +1,17 @@
// See https://github.com/manyeyes for more information
// Copyright (c)  2023 by manyeyes
using System.Linq;
using System.Text;
using System.Threading.Tasks;
// Copyright (c)  2024 by manyeyes
using AliParaformerAsr.Model;
using AliParaformerAsr.Utils;
using Microsoft.Extensions.Logging;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using Microsoft.Extensions.Logging;
using System.Text.RegularExpressions;
using Newtonsoft.Json.Linq;
using System.Text.RegularExpressions;
// 模型文件地址: https://modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
// 模型文件地址: https://www.modelscope.cn/models/manyeyes/sensevoice-small-onnx
namespace AliParaformerAsr
{
    public enum OnnxRumtimeTypes
    {
        CPU = 0,
        DML = 1,
        CUDA = 2,
    }
    /// <summary>
    /// offline recognizer package
    /// Copyright (c)  2023 by manyeyes
@@ -35,6 +24,8 @@
        private string _frontend;
        private FrontendConfEntity _frontendConfEntity;
        private string[] _tokens;
        private IOfflineProj? _offlineProj;
        private OfflineModel _offlineModel;
        /// <summary>
        /// 
@@ -48,24 +39,9 @@
        /// <param name="batchSize"></param>
        /// <param name="threadsNum"></param>
        /// <exception cref="ArgumentException"></exception>
        public OfflineRecognizer(string modelFilePath, string configFilePath, string mvnFilePath, string tokensFilePath, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0)
        public OfflineRecognizer(string modelFilePath, string configFilePath, string mvnFilePath, string tokensFilePath, int threadsNum = 1, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0)
        {
            var options = new SessionOptions();
            switch(rumtimeType)
            {
                case OnnxRumtimeTypes.DML:
                    options.AppendExecutionProvider_DML(deviceId);
                    break;
                case OnnxRumtimeTypes.CUDA:
                    options.AppendExecutionProvider_CUDA(deviceId);
                    break;
                default:
                    options.AppendExecutionProvider_CPU(deviceId);
                    break;
            }
            //options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
            _onnxSession = new InferenceSession(modelFilePath, options);
            _offlineModel = new OfflineModel(modelFilePath, threadsNum);
            
            string[] tokenLines;
            if (tokensFilePath.EndsWith(".txt"))
@@ -86,6 +62,18 @@
            _tokens = tokenLines;
            OfflineYamlEntity offlineYamlEntity = YamlHelper.ReadYaml<OfflineYamlEntity>(configFilePath);
            switch (offlineYamlEntity.model.ToLower())
            {
                case "paraformer":
                    _offlineProj = new OfflineProjOfParaformer(_offlineModel);
                    break;
                case "sensevoicesmall":
                    _offlineProj = new OfflineProjOfSenseVoiceSmall(_offlineModel);
                    break;
                default:
                    _offlineProj = null;
                    break;
            }
            _wavFrontend = new WavFrontend(mvnFilePath, offlineYamlEntity.frontend_conf);
            _frontend = offlineYamlEntity.frontend;
            _frontendConfEntity = offlineYamlEntity.frontend_conf;
@@ -120,73 +108,40 @@
        private OfflineOutputEntity Forward(List<OfflineInputEntity> modelInputs)
        {
            int BatchSize = modelInputs.Count;
            float[] padSequence = PadSequence(modelInputs);
            var inputMeta = _onnxSession.InputMetadata;
            var container = new List<NamedOnnxValue>();
            foreach (var name in inputMeta.Keys)
            {
                if (name == "speech")
                {
                    int[] dim = new int[] { BatchSize, padSequence.Length / 560 / BatchSize, 560 };//inputMeta["speech"].Dimensions[2]
                    var tensor = new DenseTensor<float>(padSequence, dim, false);
                    container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
                }
                if (name == "speech_lengths")
                {
                    int[] dim = new int[] { BatchSize };
                    int[] speech_lengths = new int[BatchSize];
                    for (int i = 0; i < BatchSize; i++)
                    {
                        speech_lengths[i] = padSequence.Length / 560 / BatchSize;
                    }
                    var tensor = new DenseTensor<int>(speech_lengths, dim, false);
                    container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor));
                }
            }
            IReadOnlyCollection<string> outputNames = new List<string>();
            outputNames.Append("logits");
            outputNames.Append("token_num");
            IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = null;
            OfflineOutputEntity offlineOutputEntity = new OfflineOutputEntity();
            try
            {
                results = _onnxSession.Run(container);
                ModelOutputEntity modelOutputEntity = _offlineProj.ModelProj(modelInputs);
                if (modelOutputEntity != null)
                {
                    offlineOutputEntity.Token_nums_length = modelOutputEntity.model_out_lens.AsEnumerable<int>().ToArray();
                    Tensor<float> logits_tensor = modelOutputEntity.model_out;
                    List<int[]> token_nums = new List<int[]> { };
                    for (int i = 0; i < logits_tensor.Dimensions[0]; i++)
                    {
                        int[] item = new int[logits_tensor.Dimensions[1]];
                        for (int j = 0; j < logits_tensor.Dimensions[1]; j++)
                        {
                            int token_num = 0;
                            for (int k = 1; k < logits_tensor.Dimensions[2]; k++)
                            {
                                token_num = logits_tensor[i, j, token_num] > logits_tensor[i, j, k] ? token_num : k;
                            }
                            item[j] = (int)token_num;
                        }
                        token_nums.Add(item);
                    }
                    offlineOutputEntity.Token_nums = token_nums;
                }
            }
            catch (Exception ex)
            {
                //
            }
            OfflineOutputEntity modelOutput = new OfflineOutputEntity();
            if (results != null)
            {
                var resultsArray = results.ToArray();
                modelOutput.Logits = resultsArray[0].AsEnumerable<float>().ToArray();
                modelOutput.Token_nums_length = resultsArray[1].AsEnumerable<int>().ToArray();
                Tensor<float> logits_tensor = resultsArray[0].AsTensor<float>();
                Tensor<Int64> token_nums_tensor = resultsArray[1].AsTensor<Int64>();
                List<int[]> token_nums = new List<int[]> { };
                for (int i = 0; i < logits_tensor.Dimensions[0]; i++)
                {
                    int[] item = new int[logits_tensor.Dimensions[1]];
                    for (int j = 0; j < logits_tensor.Dimensions[1]; j++)
                    {
                        int token_num = 0;
                        for (int k = 1; k < logits_tensor.Dimensions[2]; k++)
                        {
                            token_num = logits_tensor[i, j, token_num] > logits_tensor[i, j, k] ? token_num : k;
                        }
                        item[j] = (int)token_num;
                    }
                    token_nums.Add(item);
                }
                modelOutput.Token_nums = token_nums;
            }
            return modelOutput;
            return offlineOutputEntity;
        }
        private List<string> DecodeMulti(List<int[]> token_nums)
@@ -203,9 +158,9 @@
                        break;
                    }
                    string tokenChar = _tokens[token];
                    string tokenChar = _tokens[token].Split("\t")[0];
                    if (tokenChar != "</s>" && tokenChar != "<s>" && tokenChar != "<blank>")
                    if (tokenChar != "</s>" && tokenChar != "<s>" && tokenChar != "<blank>" && tokenChar != "<unk>")
                    {                        
                        if (IsChinese(tokenChar, true))
                        {
@@ -244,48 +199,6 @@
            else
                return false;
        }
        private float[] PadSequence(List<OfflineInputEntity> modelInputs)
        {
            int max_speech_length = modelInputs.Max(x => x.SpeechLength);
            int speech_length = max_speech_length * modelInputs.Count;
            float[] speech = new float[speech_length];
            float[,] xxx = new float[modelInputs.Count, max_speech_length];
            for (int i = 0; i < modelInputs.Count; i++)
            {
                if (max_speech_length == modelInputs[i].SpeechLength)
                {
                    for (int j = 0; j < xxx.GetLength(1); j++)
                    {
#pragma warning disable CS8602 // 解引用可能出现空引用。
                        xxx[i, j] = modelInputs[i].Speech[j];
#pragma warning restore CS8602 // 解引用可能出现空引用。
                    }
                    continue;
                }
                float[] nullspeech = new float[max_speech_length - modelInputs[i].SpeechLength];
                float[]? curr_speech = modelInputs[i].Speech;
                float[] padspeech = new float[max_speech_length];
                padspeech = _wavFrontend.ApplyCmvn(padspeech);
                Array.Copy(curr_speech, 0, padspeech, 0, curr_speech.Length);
                for (int j = 0; j < padspeech.Length; j++)
                {
#pragma warning disable CS8602 // 解引用可能出现空引用。
                    xxx[i, j] = padspeech[j];
#pragma warning restore CS8602 // 解引用可能出现空引用。
                }
            }
            int s = 0;
            for (int i = 0; i < xxx.GetLength(0); i++)
            {
                for (int j = 0; j < xxx.GetLength(1); j++)
                {
                    speech[s] = xxx[i, j];
                    s++;
                }
            }
            return speech;
        }
    }
}
runtime/csharp/AliParaformerAsr/AliParaformerAsr/Utils/PadHelper.cs
New file
@@ -0,0 +1,55 @@
using AliParaformerAsr.Model;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace AliParaformerAsr.Utils
{
    internal static class PadHelper
    {
        public static float[] PadSequence(List<OfflineInputEntity> modelInputs)
        {
            int max_speech_length = modelInputs.Max(x => x.SpeechLength);
            int speech_length = max_speech_length * modelInputs.Count;
            float[] speech = new float[speech_length];
            float[,] xxx = new float[modelInputs.Count, max_speech_length];
            for (int i = 0; i < modelInputs.Count; i++)
            {
                if (max_speech_length == modelInputs[i].SpeechLength)
                {
                    for (int j = 0; j < xxx.GetLength(1); j++)
                    {
#pragma warning disable CS8602 // 解引用可能出现空引用。
                        xxx[i, j] = modelInputs[i].Speech[j];
#pragma warning restore CS8602 // 解引用可能出现空引用。
                    }
                    continue;
                }
                float[] nullspeech = new float[max_speech_length - modelInputs[i].SpeechLength];
                float[]? curr_speech = modelInputs[i].Speech;
                float[] padspeech = new float[max_speech_length];
                Array.Copy(curr_speech, 0, padspeech, 0, curr_speech.Length);
                for (int j = 0; j < padspeech.Length; j++)
                {
#pragma warning disable CS8602 // 解引用可能出现空引用。
                    xxx[i, j] = padspeech[j];
#pragma warning restore CS8602 // 解引用可能出现空引用。
                }
            }
            int s = 0;
            for (int i = 0; i < xxx.GetLength(0); i++)
            {
                for (int j = 0; j < xxx.GetLength(1); j++)
                {
                    speech[s] = xxx[i, j];
                    s++;
                }
            }
            speech = speech.Select(x => x == 0 ? -23.025850929940457F * 32768 : x).ToArray();
            return speech;
        }
    }
}
runtime/csharp/AliParaformerAsr/AliParaformerAsr/WavFrontend.cs
@@ -30,7 +30,8 @@
            _fbank_beg_idx = 0;
            _onlineFbank = new OnlineFbank(
                dither: _frontendConfEntity.dither,
                snip_edges: false,
                snip_edges: _frontendConfEntity.snip_edges,
                window_type: _frontendConfEntity.window,
                sample_rate: _frontendConfEntity.fs,
                num_bins: _frontendConfEntity.n_mels
                );
runtime/csharp/AliParaformerAsr/README.md
@@ -1,4 +1,10 @@
# AliParaformerAsr
##### 支持模型
## paraformer-large offline onnx模型下载
https://huggingface.co/manyeyes/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
## SenseVoiceSmall offline onnx模型下载
https://www.modelscope.cn/models/manyeyes/sensevoice-small-onnx
##### 简介:
项目中使用的Asr模型是阿里巴巴达摩院提供的Paraformer-large ASR模型。
**项目基于Net 6.0,使用C#编写,调用Microsoft.ML.OnnxRuntime对onnx模型进行解码,支持跨平台编译。项目以库的形式进行调用,部署非常方便。**
@@ -25,9 +31,6 @@
##### ASR常用参数(参考:asr.yaml文件):
用于解码的asr.yaml配置参数,取自官方模型配置config.yaml原文件。便于跟进和升级。
## paraformer-large offline onnx模型下载
https://huggingface.co/manyeyes/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
## 离线(非流式)模型调用方法: