From a5a06217c17dc69aca53d1e247b7a6671d35373c Mon Sep 17 00:00:00 2001
From: manyeyes <32889020+manyeyes@users.noreply.github.com>
Date: 星期二, 23 七月 2024 14:41:00 +0800
Subject: [PATCH] Add support for SenseVoiceSmall onnx model in c# lib (#1946)

---
 runtime/csharp/AliParaformerAsr/README.md                                        |    9 
 runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineModel.cs                 |   72 ++++++
 runtime/csharp/AliParaformerAsr/AliParaformerAsr/Utils/PadHelper.cs              |   55 ++++
 runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelOutputEntity.cs      |   17 +
 runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineRecognizer.cs            |  187 ++++-----------
 runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/FrontendConfEntity.cs     |   10 
 runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfParaformer.cs      |  113 +++++++++
 runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfSenseVoiceSmall.cs |  156 +++++++++++++
 runtime/csharp/AliParaformerAsr/AliParaformerAsr/WavFrontend.cs                  |    3 
 runtime/csharp/AliParaformerAsr/AliParaformerAsr/AliParaformerAsr.csproj         |    8 
 runtime/csharp/AliParaformerAsr/AliParaformerAsr/IOfflineProj.cs                 |   43 +++
 runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelConfEntity.cs        |   12 
 12 files changed, 527 insertions(+), 158 deletions(-)

diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/AliParaformerAsr.csproj b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/AliParaformerAsr.csproj
index b3fe558..de0416f 100644
--- a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/AliParaformerAsr.csproj
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/AliParaformerAsr.csproj
@@ -1,16 +1,16 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFramework>net6.0</TargetFramework>
+    <TargetFramework>net8.0</TargetFramework>
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="KaldiNativeFbankSharp" Version="1.0.8" />
+    <PackageReference Include="KaldiNativeFbankSharp" Version="1.1.2" />
     <PackageReference Include="Microsoft.Extensions.Logging" Version="7.0.0" />
-    <PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.17.3" />
-    <PackageReference Include="Microsoft.ML.OnnxRuntime.Managed" Version="1.17.3" />
+    <PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.18.1" />
+    <PackageReference Include="Microsoft.ML.OnnxRuntime.Managed" Version="1.18.1" />
     <PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
     <PackageReference Include="YamlDotNet" Version="13.1.1" />
   </ItemGroup>
diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/IOfflineProj.cs b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/IOfflineProj.cs
new file mode 100644
index 0000000..5cf2544
--- /dev/null
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/IOfflineProj.cs
@@ -0,0 +1,43 @@
+锘�// See https://github.com/manyeyes for more information
+// Copyright (c)  2024 by manyeyes
+using AliParaformerAsr.Model;
+using Microsoft.ML.OnnxRuntime;
+
+namespace AliParaformerAsr
+{
+    internal interface IOfflineProj
+    {
+        InferenceSession ModelSession 
+        {
+            get;
+            set;
+        }
+        int Blank_id
+        {
+            get;
+            set;
+        }
+        int Sos_eos_id
+        {
+            get;
+            set;
+        }
+        int Unk_id
+        {
+            get;
+            set;
+        }
+        int SampleRate
+        {
+            get;
+            set;
+        }
+        int FeatureDim
+        {
+            get;
+            set;
+        }
+        internal ModelOutputEntity ModelProj(List<OfflineInputEntity> modelInputs);
+        internal void Dispose();
+    }
+}
diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/FrontendConfEntity.cs b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/FrontendConfEntity.cs
index 4b0ca8e..d8c0021 100644
--- a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/FrontendConfEntity.cs
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/FrontendConfEntity.cs
@@ -1,11 +1,5 @@
 锘�// See https://github.com/manyeyes for more information
 // Copyright (c)  2023 by manyeyes
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
 namespace AliParaformerAsr.Model
 {
     public class FrontendConfEntity
@@ -15,9 +9,10 @@
         private int _n_mels = 80;
         private int _frame_length = 25;
         private int _frame_shift = 10;
-        private float _dither = 0.0F;
+        private float _dither = 1.0F;
         private int _lfr_m = 7;
         private int _lfr_n = 6;
+        private bool _snip_edges = false;
 
         public int fs { get => _fs; set => _fs = value; }
         public string window { get => _window; set => _window = value; }
@@ -27,5 +22,6 @@
         public float dither { get => _dither; set => _dither = value; }
         public int lfr_m { get => _lfr_m; set => _lfr_m = value; }
         public int lfr_n { get => _lfr_n; set => _lfr_n = value; }
+        public bool snip_edges { get => _snip_edges; set => _snip_edges = value; }
     }
 }
diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelConfEntity.cs b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelConfEntity.cs
index 15af179..844f02e 100644
--- a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelConfEntity.cs
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelConfEntity.cs
@@ -1,11 +1,5 @@
 锘�// See https://github.com/manyeyes for more information
 // Copyright (c)  2023 by manyeyes
-using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
-
 namespace AliParaformerAsr.Model
 {
     public class ModelConfEntity
@@ -16,6 +10,9 @@
         private float _predictor_weight = 1.0F;
         private int _predictor_bias = 1;
         private float _sampling_ratio = 0.75F;
+        private int _sos = 1;
+        private int _eos = 2;
+        private int _ignore_id = -1;
 
         public float ctc_weight { get => _ctc_weight; set => _ctc_weight = value; }
         public float lsm_weight { get => _lsm_weight; set => _lsm_weight = value; }
@@ -23,5 +20,8 @@
         public float predictor_weight { get => _predictor_weight; set => _predictor_weight = value; }
         public int predictor_bias { get => _predictor_bias; set => _predictor_bias = value; }
         public float sampling_ratio { get => _sampling_ratio; set => _sampling_ratio = value; }
+        public int sos { get => _sos; set => _sos = value; }
+        public int eos { get => _eos; set => _eos = value; }
+        public int ignore_id { get => _ignore_id; set => _ignore_id = value; }
     }
 }
diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelOutputEntity.cs b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelOutputEntity.cs
new file mode 100644
index 0000000..6ee5de0
--- /dev/null
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelOutputEntity.cs
@@ -0,0 +1,17 @@
+锘�// See https://github.com/manyeyes for more information
+// Copyright (c)  2024 by manyeyes
+using Microsoft.ML.OnnxRuntime.Tensors;
+
+namespace AliParaformerAsr.Model
+{
+    internal class ModelOutputEntity
+    {
+        private Tensor<float>? _model_out;
+        private int[]? _model_out_lens;
+        private Tensor<float>? _cif_peak_tensor;
+
+        public Tensor<float>? model_out { get => _model_out; set => _model_out = value; }
+        public int[]? model_out_lens { get => _model_out_lens; set => _model_out_lens = value; }
+        public Tensor<float>? cif_peak_tensor { get => _cif_peak_tensor; set => _cif_peak_tensor = value; }
+    }
+}
diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineModel.cs b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineModel.cs
new file mode 100644
index 0000000..28901ca
--- /dev/null
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineModel.cs
@@ -0,0 +1,72 @@
+锘�// See https://github.com/manyeyes for more information
+// Copyright (c)  2024 by manyeyes
+using Microsoft.ML.OnnxRuntime;
+
+namespace AliParaformerAsr
+{
+    public enum OnnxRumtimeTypes
+    {
+        CPU = 0,
+
+        DML = 1,
+
+        CUDA = 2,
+    }
+    public class OfflineModel
+    {
+        private InferenceSession _modelSession;
+        private int _blank_id = 0;
+        private int sos_eos_id = 1;
+        private int _unk_id = 2;
+        private int _featureDim = 80;
+        private int _sampleRate = 16000;
+
+        public OfflineModel(string modelFilePath, int threadsNum = 2, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0)
+        {
+            _modelSession = initModel(modelFilePath, threadsNum, rumtimeType, deviceId);
+        }
+        public int Blank_id { get => _blank_id; set => _blank_id = value; }
+        public int Sos_eos_id { get => sos_eos_id; set => sos_eos_id = value; }
+        public int Unk_id { get => _unk_id; set => _unk_id = value; }
+        public int FeatureDim { get => _featureDim; set => _featureDim = value; }
+        public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; }
+        public int SampleRate { get => _sampleRate; set => _sampleRate = value; }
+
+        public InferenceSession initModel(string modelFilePath, int threadsNum = 2, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0)
+        {
+            var options = new SessionOptions();
+            switch (rumtimeType)
+            {
+                case OnnxRumtimeTypes.DML:
+                    options.AppendExecutionProvider_DML(deviceId);
+                    break;
+                case OnnxRumtimeTypes.CUDA:
+                    options.AppendExecutionProvider_CUDA(deviceId);
+                    break;
+                default:
+                    options.AppendExecutionProvider_CPU(deviceId);
+                    break;
+            }
+            //options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
+            options.InterOpNumThreads = threadsNum;
+            InferenceSession onnxSession = new InferenceSession(modelFilePath, options);
+            return onnxSession;
+        }
+        protected virtual void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                if (_modelSession != null)
+                {
+                    _modelSession.Dispose();
+                }
+            }
+        }
+
+        internal void Dispose()
+        {
+            Dispose(disposing: true);
+            GC.SuppressFinalize(this);
+        }
+    }
+}
diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfParaformer.cs b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfParaformer.cs
new file mode 100644
index 0000000..82d75ef
--- /dev/null
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfParaformer.cs
@@ -0,0 +1,113 @@
+锘�// See https://github.com/manyeyes for more information
+// Copyright (c)  2024 by manyeyes
+using AliParaformerAsr.Model;
+using Microsoft.ML.OnnxRuntime;
+using Microsoft.ML.OnnxRuntime.Tensors;
+using AliParaformerAsr.Utils;
+
+namespace AliParaformerAsr
+{
+    internal class OfflineProjOfParaformer : IOfflineProj, IDisposable
+    {
+        // To detect redundant calls
+        private bool _disposed;
+
+        private InferenceSession _modelSession;
+        private int _blank_id = 0;
+        private int _sos_eos_id = 1;
+        private int _unk_id = 2;
+
+        private int _featureDim = 80;
+        private int _sampleRate = 16000;
+
+        public OfflineProjOfParaformer(OfflineModel offlineModel)
+        {
+            _modelSession = offlineModel.ModelSession;
+            _blank_id = offlineModel.Blank_id;
+            _sos_eos_id = offlineModel.Sos_eos_id;
+            _unk_id = offlineModel.Unk_id;
+            _featureDim = offlineModel.FeatureDim;
+            _sampleRate = offlineModel.SampleRate;
+        }
+        public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; }
+        public int Blank_id { get => _blank_id; set => _blank_id = value; }
+        public int Sos_eos_id { get => _sos_eos_id; set => _sos_eos_id = value; }
+        public int Unk_id { get => _unk_id; set => _unk_id = value; }
+        public int FeatureDim { get => _featureDim; set => _featureDim = value; }
+        public int SampleRate { get => _sampleRate; set => _sampleRate = value; }
+
+        public ModelOutputEntity ModelProj(List<OfflineInputEntity> modelInputs)
+        {
+            int batchSize = modelInputs.Count;
+            float[] padSequence = PadHelper.PadSequence(modelInputs);
+            var inputMeta = _modelSession.InputMetadata;
+            var container = new List<NamedOnnxValue>();
+            foreach (var name in inputMeta.Keys)
+            {
+                if (name == "speech")
+                {
+                    int[] dim = new int[] { batchSize, padSequence.Length / 560 / batchSize, 560 };
+                    var tensor = new DenseTensor<float>(padSequence, dim, false);
+                    container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
+                }
+                if (name == "speech_lengths")
+                {
+                    int[] dim = new int[] { batchSize };
+                    int[] speech_lengths = new int[batchSize];
+                    for (int i = 0; i < batchSize; i++)
+                    {
+                        speech_lengths[i] = padSequence.Length / 560 / batchSize;
+                    }
+                    var tensor = new DenseTensor<int>(speech_lengths, dim, false);
+                    container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor));
+                }
+            }
+            ModelOutputEntity modelOutputEntity = new ModelOutputEntity();
+            try
+            {
+                IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = _modelSession.Run(container);
+
+                if (results != null)
+                {
+                    var resultsArray = results.ToArray();
+                    modelOutputEntity.model_out = resultsArray[0].AsTensor<float>();
+                    modelOutputEntity.model_out_lens = resultsArray[1].AsEnumerable<int>().ToArray();
+                    if (resultsArray.Length >= 4)
+                    {
+                        Tensor<float> cif_peak_tensor = resultsArray[3].AsTensor<float>();
+                        modelOutputEntity.cif_peak_tensor = cif_peak_tensor;
+                    }
+                }
+            }
+            catch (Exception ex)
+            {
+                //
+            }
+            return modelOutputEntity;
+        }
+        protected virtual void Dispose(bool disposing)
+        {
+            if (!_disposed)
+            {
+                if (disposing)
+                {
+                    if (_modelSession != null)
+                    {
+                        _modelSession.Dispose();
+                    }
+                }
+                _disposed = true;
+            }
+        }
+
+        public void Dispose()
+        {
+            Dispose(disposing: true);
+            GC.SuppressFinalize(this);
+        }
+        ~OfflineProjOfParaformer()
+        {
+            Dispose(_disposed);
+        }
+    }
+}
diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfSenseVoiceSmall.cs b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfSenseVoiceSmall.cs
new file mode 100644
index 0000000..705e8ef
--- /dev/null
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfSenseVoiceSmall.cs
@@ -0,0 +1,156 @@
+锘�// See https://github.com/manyeyes for more information
+// Copyright (c)  2024 by manyeyes
+using AliParaformerAsr.Model;
+using Microsoft.ML.OnnxRuntime;
+using Microsoft.ML.OnnxRuntime.Tensors;
+using AliParaformerAsr.Utils;
+
+namespace AliParaformerAsr
+{
+    internal class OfflineProjOfSenseVoiceSmall : IOfflineProj, IDisposable
+    {
+        // To detect redundant calls
+        private bool _disposed;
+
+        private InferenceSession _modelSession;
+        private int _blank_id = 0;
+        private int _sos_eos_id = 1;
+        private int _unk_id = 2;
+
+        private int _featureDim = 80;
+        private int _sampleRate = 16000;
+
+        private bool _use_itn = false;
+        private string _textnorm = "woitn";
+        private Dictionary<string, int> _lidDict = new Dictionary<string, int>() { { "auto", 0 }, { "zh", 3 }, { "en", 4 }, { "yue", 7 }, { "ja", 11 }, { "ko", 12 }, { "nospeech", 13 } };
+        private Dictionary<int, int> _lidIntDict = new Dictionary<int, int>() { { 24884, 3 }, { 24885, 4 }, { 24888, 7 }, { 24892, 11 }, { 24896, 12 }, { 24992, 13 } };
+        private Dictionary<string, int> _textnormDict = new Dictionary<string, int>() { { "withitn", 14 }, { "woitn", 15 } };
+        private Dictionary<int, int> _textnormIntDict = new Dictionary<int, int>() { { 25016, 14 }, { 25017, 15 } };
+
+        public OfflineProjOfSenseVoiceSmall(OfflineModel offlineModel)
+        {
+            _modelSession = offlineModel.ModelSession;
+            _blank_id = offlineModel.Blank_id;
+            _sos_eos_id = offlineModel.Sos_eos_id;
+            _unk_id = offlineModel.Unk_id;
+            _featureDim = offlineModel.FeatureDim;
+            _sampleRate = offlineModel.SampleRate;
+        }
+        public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; }
+        public int Blank_id { get => _blank_id; set => _blank_id = value; }
+        public int Sos_eos_id { get => _sos_eos_id; set => _sos_eos_id = value; }
+        public int Unk_id { get => _unk_id; set => _unk_id = value; }
+        public int FeatureDim { get => _featureDim; set => _featureDim = value; }
+        public int SampleRate { get => _sampleRate; set => _sampleRate = value; }
+
+        public ModelOutputEntity ModelProj(List<OfflineInputEntity> modelInputs)
+        {
+            int batchSize = modelInputs.Count;
+            float[] padSequence = PadHelper.PadSequence(modelInputs);
+            //
+            string languageValue = "ja";
+            int languageId = 0;
+            if (_lidDict.ContainsKey(languageValue))
+            {
+                languageId = _lidDict.GetValueOrDefault(languageValue);
+            }
+            string textnormValue = "withitn";
+            int textnormId = 15;
+            if (_textnormDict.ContainsKey(textnormValue))
+            {
+                textnormId = _textnormDict.GetValueOrDefault(textnormValue);
+            }
+            var inputMeta = _modelSession.InputMetadata;
+            var container = new List<NamedOnnxValue>();
+            foreach (var name in inputMeta.Keys)
+            {
+                if (name == "speech")
+                {
+                    int[] dim = new int[] { batchSize, padSequence.Length / 560 / batchSize, 560 };
+                    var tensor = new DenseTensor<float>(padSequence, dim, false);
+                    container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
+                }
+                if (name == "speech_lengths")
+                {
+
+                    int[] dim = new int[] { batchSize };
+                    int[] speech_lengths = new int[batchSize];
+                    for (int i = 0; i < batchSize; i++)
+                    {
+                        speech_lengths[i] = padSequence.Length / 560 / batchSize;
+                    }
+                    var tensor = new DenseTensor<int>(speech_lengths, dim, false);
+                    container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor));
+                }
+                if (name == "language")
+                {
+                    int[] language = new int[batchSize];
+                    for (int i = 0; i < batchSize; i++)
+                    {
+                        language[i] = languageId;
+                    }
+                    int[] dim = new int[] { batchSize };
+                    var tensor = new DenseTensor<int>(language, dim, false);
+                    container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor));
+                }
+                if (name == "textnorm")
+                {
+                    int[] textnorm = new int[batchSize];
+                    for (int i = 0; i < batchSize; i++)
+                    {
+                        textnorm[i] = textnormId;
+                    }
+                    int[] dim = new int[] { batchSize };
+                    var tensor = new DenseTensor<int>(textnorm, dim, false);
+                    container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor));
+                }
+            }
+            ModelOutputEntity modelOutputEntity = new ModelOutputEntity();
+            try
+            {
+                IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = _modelSession.Run(container);
+
+                if (results != null)
+                {
+                    var resultsArray = results.ToArray();
+                    modelOutputEntity.model_out = resultsArray[0].AsTensor<float>();
+                    modelOutputEntity.model_out_lens = resultsArray[1].AsEnumerable<int>().ToArray();
+                    if (resultsArray.Length >= 4)
+                    {
+                        Tensor<float> cif_peak_tensor = resultsArray[3].AsTensor<float>();
+                        modelOutputEntity.cif_peak_tensor = cif_peak_tensor;
+                    }
+                }
+            }
+            catch (Exception ex)
+            {
+                //
+            }
+            return modelOutputEntity;
+        }
+        protected virtual void Dispose(bool disposing)
+        {
+            if (!_disposed)
+            {
+                if (disposing)
+                {
+                    if (_modelSession != null)
+                    {
+                        _modelSession.Dispose();
+                    }
+                }
+                _disposed = true;
+            }
+        }
+
+        public void Dispose()
+        {
+            Dispose(disposing: true);
+            GC.SuppressFinalize(this);
+        }
+        ~OfflineProjOfSenseVoiceSmall()
+        {
+            Dispose(_disposed);
+        }
+    }
+}
diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineRecognizer.cs b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineRecognizer.cs
index c2d7f68..3011d13 100644
--- a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineRecognizer.cs
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineRecognizer.cs
@@ -1,28 +1,17 @@
 锘�// See https://github.com/manyeyes for more information
-// Copyright (c)  2023 by manyeyes
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
+// Copyright (c)  2024 by manyeyes
 using AliParaformerAsr.Model;
 using AliParaformerAsr.Utils;
+using Microsoft.Extensions.Logging;
 using Microsoft.ML.OnnxRuntime;
 using Microsoft.ML.OnnxRuntime.Tensors;
-using Microsoft.Extensions.Logging;
-using System.Text.RegularExpressions;
 using Newtonsoft.Json.Linq;
+using System.Text.RegularExpressions;
 
 // 妯″瀷鏂囦欢鍦板潃锛� https://modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
+// 妯″瀷鏂囦欢鍦板潃锛� https://www.modelscope.cn/models/manyeyes/sensevoice-small-onnx
 namespace AliParaformerAsr
 {
-    public enum OnnxRumtimeTypes
-    {
-        CPU = 0,
-
-        DML = 1,
-
-        CUDA = 2,
-    }
-
     /// <summary>
     /// offline recognizer package
     /// Copyright (c)  2023 by manyeyes
@@ -35,6 +24,8 @@
         private string _frontend;
         private FrontendConfEntity _frontendConfEntity;
         private string[] _tokens;
+        private IOfflineProj? _offlineProj;
+        private OfflineModel _offlineModel;
 
         /// <summary>
         /// 
@@ -48,24 +39,9 @@
         /// <param name="batchSize"></param>
         /// <param name="threadsNum"></param>
         /// <exception cref="ArgumentException"></exception>
-        public OfflineRecognizer(string modelFilePath, string configFilePath, string mvnFilePath, string tokensFilePath, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0)
+        public OfflineRecognizer(string modelFilePath, string configFilePath, string mvnFilePath, string tokensFilePath, int threadsNum = 1, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0)
         {
-            var options = new SessionOptions();
-            switch(rumtimeType)
-            {
-                case OnnxRumtimeTypes.DML:
-                    options.AppendExecutionProvider_DML(deviceId);
-                    break;
-                case OnnxRumtimeTypes.CUDA:
-                    options.AppendExecutionProvider_CUDA(deviceId);
-                    break;
-                default:
-                    options.AppendExecutionProvider_CPU(deviceId);
-                    break;
-            }
-            //options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
-
-            _onnxSession = new InferenceSession(modelFilePath, options);
+            _offlineModel = new OfflineModel(modelFilePath, threadsNum);
             
             string[] tokenLines;
             if (tokensFilePath.EndsWith(".txt"))
@@ -86,6 +62,18 @@
             _tokens = tokenLines;
 
             OfflineYamlEntity offlineYamlEntity = YamlHelper.ReadYaml<OfflineYamlEntity>(configFilePath);
+            switch (offlineYamlEntity.model.ToLower())
+            {
+                case "paraformer":
+                    _offlineProj = new OfflineProjOfParaformer(_offlineModel);
+                    break;
+                case "sensevoicesmall":
+                    _offlineProj = new OfflineProjOfSenseVoiceSmall(_offlineModel);
+                    break;
+                default:
+                    _offlineProj = null;
+                    break;
+            }
             _wavFrontend = new WavFrontend(mvnFilePath, offlineYamlEntity.frontend_conf);
             _frontend = offlineYamlEntity.frontend;
             _frontendConfEntity = offlineYamlEntity.frontend_conf;
@@ -120,73 +108,40 @@
 
         private OfflineOutputEntity Forward(List<OfflineInputEntity> modelInputs)
         {
-            int BatchSize = modelInputs.Count;
-            float[] padSequence = PadSequence(modelInputs);
-            var inputMeta = _onnxSession.InputMetadata;
-            var container = new List<NamedOnnxValue>(); 
-            foreach (var name in inputMeta.Keys)
-            {
-                if (name == "speech")
-                {
-                    int[] dim = new int[] { BatchSize, padSequence.Length / 560 / BatchSize, 560 };//inputMeta["speech"].Dimensions[2]
-                    var tensor = new DenseTensor<float>(padSequence, dim, false);
-                    container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
-                }
-                if (name == "speech_lengths")
-                {
-                    int[] dim = new int[] { BatchSize };
-                    int[] speech_lengths = new int[BatchSize];
-                    for (int i = 0; i < BatchSize; i++)
-                    {
-                        speech_lengths[i] = padSequence.Length / 560 / BatchSize;
-                    }
-                    var tensor = new DenseTensor<int>(speech_lengths, dim, false);
-                    container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor));
-                }
-            }
-
-
-            IReadOnlyCollection<string> outputNames = new List<string>();
-            outputNames.Append("logits");
-            outputNames.Append("token_num");
-            IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = null;
+            OfflineOutputEntity offlineOutputEntity = new OfflineOutputEntity();            
             try
             {
-                results = _onnxSession.Run(container);
+                ModelOutputEntity modelOutputEntity = _offlineProj.ModelProj(modelInputs);
+                if (modelOutputEntity != null)
+                {
+                    offlineOutputEntity.Token_nums_length = modelOutputEntity.model_out_lens.AsEnumerable<int>().ToArray();
+                    Tensor<float> logits_tensor = modelOutputEntity.model_out;
+                    List<int[]> token_nums = new List<int[]> { };
+
+                    for (int i = 0; i < logits_tensor.Dimensions[0]; i++)
+                    {
+                        int[] item = new int[logits_tensor.Dimensions[1]];
+                        for (int j = 0; j < logits_tensor.Dimensions[1]; j++)
+                        {
+                            int token_num = 0;
+                            for (int k = 1; k < logits_tensor.Dimensions[2]; k++)
+                            {
+                                token_num = logits_tensor[i, j, token_num] > logits_tensor[i, j, k] ? token_num : k;
+                            }
+                            item[j] = (int)token_num;
+                        }
+                        token_nums.Add(item);
+                    }
+                    offlineOutputEntity.Token_nums = token_nums;
+                }
             }
             catch (Exception ex)
             {
                 //
             }
-            OfflineOutputEntity modelOutput = new OfflineOutputEntity();
-            if (results != null)
-            {
-                var resultsArray = results.ToArray();
-                modelOutput.Logits = resultsArray[0].AsEnumerable<float>().ToArray();
-                modelOutput.Token_nums_length = resultsArray[1].AsEnumerable<int>().ToArray();
-
-                Tensor<float> logits_tensor = resultsArray[0].AsTensor<float>();
-                Tensor<Int64> token_nums_tensor = resultsArray[1].AsTensor<Int64>();
-
-                List<int[]> token_nums = new List<int[]> { };
-
-                for (int i = 0; i < logits_tensor.Dimensions[0]; i++)
-                {
-                    int[] item = new int[logits_tensor.Dimensions[1]];
-                    for (int j = 0; j < logits_tensor.Dimensions[1]; j++)
-                    {                        
-                        int token_num = 0;
-                        for (int k = 1; k < logits_tensor.Dimensions[2]; k++)
-                        {
-                            token_num = logits_tensor[i, j, token_num] > logits_tensor[i, j, k] ? token_num : k;
-                        }
-                        item[j] = (int)token_num;                        
-                    }
-                    token_nums.Add(item);
-                }                
-                modelOutput.Token_nums = token_nums;
-            }
-            return modelOutput;
+            
+            
+            return offlineOutputEntity;
         }
 
         private List<string> DecodeMulti(List<int[]> token_nums)
@@ -203,9 +158,9 @@
                         break;
                     }
 
-                    string tokenChar = _tokens[token];
+                    string tokenChar = _tokens[token].Split("\t")[0];
 
-                    if (tokenChar != "</s>" && tokenChar != "<s>" && tokenChar != "<blank>")
+                    if (tokenChar != "</s>" && tokenChar != "<s>" && tokenChar != "<blank>" && tokenChar != "<unk>")
                     {                        
                         if (IsChinese(tokenChar, true))
                         {
@@ -244,48 +199,6 @@
             else
                 return false;
         }
-
-        private float[] PadSequence(List<OfflineInputEntity> modelInputs)
-        {
-            int max_speech_length = modelInputs.Max(x => x.SpeechLength);
-            int speech_length = max_speech_length * modelInputs.Count;
-            float[] speech = new float[speech_length];
-            float[,] xxx = new float[modelInputs.Count, max_speech_length];
-            for (int i = 0; i < modelInputs.Count; i++)
-            {
-                if (max_speech_length == modelInputs[i].SpeechLength)
-                {
-                    for (int j = 0; j < xxx.GetLength(1); j++)
-                    {
-#pragma warning disable CS8602 // 瑙e紩鐢ㄥ彲鑳藉嚭鐜扮┖寮曠敤銆�
-                        xxx[i, j] = modelInputs[i].Speech[j];
-#pragma warning restore CS8602 // 瑙e紩鐢ㄥ彲鑳藉嚭鐜扮┖寮曠敤銆�
-                    }
-                    continue;
-                }
-                float[] nullspeech = new float[max_speech_length - modelInputs[i].SpeechLength];
-                float[]? curr_speech = modelInputs[i].Speech;
-                float[] padspeech = new float[max_speech_length];
-                padspeech = _wavFrontend.ApplyCmvn(padspeech);
-                Array.Copy(curr_speech, 0, padspeech, 0, curr_speech.Length);
-                for (int j = 0; j < padspeech.Length; j++)
-                {
-#pragma warning disable CS8602 // 瑙e紩鐢ㄥ彲鑳藉嚭鐜扮┖寮曠敤銆�
-                    xxx[i, j] = padspeech[j];
-#pragma warning restore CS8602 // 瑙e紩鐢ㄥ彲鑳藉嚭鐜扮┖寮曠敤銆�
-                }
-
-            }
-            int s = 0;
-            for (int i = 0; i < xxx.GetLength(0); i++)
-            {
-                for (int j = 0; j < xxx.GetLength(1); j++)
-                {
-                    speech[s] = xxx[i, j];
-                    s++;
-                }
-            }
-            return speech;
-        }
+        
     }
 }
\ No newline at end of file
diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Utils/PadHelper.cs b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Utils/PadHelper.cs
new file mode 100644
index 0000000..e9102e8
--- /dev/null
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/Utils/PadHelper.cs
@@ -0,0 +1,55 @@
+锘縰sing AliParaformerAsr.Model;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace AliParaformerAsr.Utils
+{
+    internal static class PadHelper
+    {
+        public static float[] PadSequence(List<OfflineInputEntity> modelInputs)
+        {
+            int max_speech_length = modelInputs.Max(x => x.SpeechLength);
+            int speech_length = max_speech_length * modelInputs.Count;
+            float[] speech = new float[speech_length];
+            float[,] xxx = new float[modelInputs.Count, max_speech_length];
+            for (int i = 0; i < modelInputs.Count; i++)
+            {
+                if (max_speech_length == modelInputs[i].SpeechLength)
+                {
+                    for (int j = 0; j < xxx.GetLength(1); j++)
+                    {
+#pragma warning disable CS8602 // 瑙e紩鐢ㄥ彲鑳藉嚭鐜扮┖寮曠敤銆�
+                        xxx[i, j] = modelInputs[i].Speech[j];
+#pragma warning restore CS8602 // 瑙e紩鐢ㄥ彲鑳藉嚭鐜扮┖寮曠敤銆�
+                    }
+                    continue;
+                }
+                float[] nullspeech = new float[max_speech_length - modelInputs[i].SpeechLength];
+                float[]? curr_speech = modelInputs[i].Speech;
+                float[] padspeech = new float[max_speech_length];
+                Array.Copy(curr_speech, 0, padspeech, 0, curr_speech.Length);
+                for (int j = 0; j < padspeech.Length; j++)
+                {
+#pragma warning disable CS8602 // 瑙e紩鐢ㄥ彲鑳藉嚭鐜扮┖寮曠敤銆�
+                    xxx[i, j] = padspeech[j];
+#pragma warning restore CS8602 // 瑙e紩鐢ㄥ彲鑳藉嚭鐜扮┖寮曠敤銆�
+                }
+
+            }
+            int s = 0;
+            for (int i = 0; i < xxx.GetLength(0); i++)
+            {
+                for (int j = 0; j < xxx.GetLength(1); j++)
+                {
+                    speech[s] = xxx[i, j];
+                    s++;
+                }
+            }
+            speech = speech.Select(x => x == 0 ? -23.025850929940457F * 32768 : x).ToArray();
+            return speech;
+        }
+    }
+}
diff --git a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/WavFrontend.cs b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/WavFrontend.cs
index 5c1bec3..eb3a0b8 100644
--- a/runtime/csharp/AliParaformerAsr/AliParaformerAsr/WavFrontend.cs
+++ b/runtime/csharp/AliParaformerAsr/AliParaformerAsr/WavFrontend.cs
@@ -30,7 +30,8 @@
             _fbank_beg_idx = 0;
             _onlineFbank = new OnlineFbank(
                 dither: _frontendConfEntity.dither,
-                snip_edges: false,
+                snip_edges: _frontendConfEntity.snip_edges,
+                window_type: _frontendConfEntity.window,
                 sample_rate: _frontendConfEntity.fs,
                 num_bins: _frontendConfEntity.n_mels
                 );
diff --git a/runtime/csharp/AliParaformerAsr/README.md b/runtime/csharp/AliParaformerAsr/README.md
index 887180d..4b2ef5b 100644
--- a/runtime/csharp/AliParaformerAsr/README.md
+++ b/runtime/csharp/AliParaformerAsr/README.md
@@ -1,4 +1,10 @@
 # AliParaformerAsr
+##### 鏀寔妯″瀷
+## paraformer-large offline onnx妯″瀷涓嬭浇
+https://huggingface.co/manyeyes/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
+## SenseVoiceSmall offline onnx妯″瀷涓嬭浇
+https://www.modelscope.cn/models/manyeyes/sensevoice-small-onnx
+
 ##### 绠�浠嬶細
 椤圭洰涓娇鐢ㄧ殑Asr妯″瀷鏄樋閲屽反宸磋揪鎽╅櫌鎻愪緵鐨凱araformer-large ASR妯″瀷銆�
 **椤圭洰鍩轰簬Net 6.0锛屼娇鐢–#缂栧啓锛岃皟鐢∕icrosoft.ML.OnnxRuntime瀵筼nnx妯″瀷杩涜瑙g爜锛屾敮鎸佽法骞冲彴缂栬瘧銆傞」鐩互搴撶殑褰㈠紡杩涜璋冪敤锛岄儴缃查潪甯告柟渚裤��**
@@ -25,9 +31,6 @@
 
 ##### ASR甯哥敤鍙傛暟锛堝弬鑰冿細asr.yaml鏂囦欢锛夛細
 鐢ㄤ簬瑙g爜鐨刟sr.yaml閰嶇疆鍙傛暟锛屽彇鑷畼鏂规ā鍨嬮厤缃甤onfig.yaml鍘熸枃浠躲�備究浜庤窡杩涘拰鍗囩骇銆�
-
-## paraformer-large offline onnx妯″瀷涓嬭浇
-https://huggingface.co/manyeyes/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
 
 ## 绂荤嚎锛堥潪娴佸紡锛夋ā鍨嬭皟鐢ㄦ柟娉曪細
 

--
Gitblit v1.9.1