runtime/csharp/AliParaformerAsr/AliParaformerAsr/AliParaformerAsr.csproj
@@ -1,16 +1,16 @@ <Project Sdk="Microsoft.NET.Sdk"> <PropertyGroup> <TargetFramework>net6.0</TargetFramework> <TargetFramework>net8.0</TargetFramework> <ImplicitUsings>enable</ImplicitUsings> <Nullable>enable</Nullable> </PropertyGroup> <ItemGroup> <PackageReference Include="KaldiNativeFbankSharp" Version="1.0.8" /> <PackageReference Include="KaldiNativeFbankSharp" Version="1.1.2" /> <PackageReference Include="Microsoft.Extensions.Logging" Version="7.0.0" /> <PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.17.3" /> <PackageReference Include="Microsoft.ML.OnnxRuntime.Managed" Version="1.17.3" /> <PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.18.1" /> <PackageReference Include="Microsoft.ML.OnnxRuntime.Managed" Version="1.18.1" /> <PackageReference Include="Newtonsoft.Json" Version="13.0.3" /> <PackageReference Include="YamlDotNet" Version="13.1.1" /> </ItemGroup> runtime/csharp/AliParaformerAsr/AliParaformerAsr/IOfflineProj.cs
New file @@ -0,0 +1,43 @@ // See https://github.com/manyeyes for more information // Copyright (c) 2024 by manyeyes using AliParaformerAsr.Model; using Microsoft.ML.OnnxRuntime; namespace AliParaformerAsr { internal interface IOfflineProj { InferenceSession ModelSession { get; set; } int Blank_id { get; set; } int Sos_eos_id { get; set; } int Unk_id { get; set; } int SampleRate { get; set; } int FeatureDim { get; set; } internal ModelOutputEntity ModelProj(List<OfflineInputEntity> modelInputs); internal void Dispose(); } } runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/FrontendConfEntity.cs
@@ -1,11 +1,5 @@ // See https://github.com/manyeyes for more information // Copyright (c) 2023 by manyeyes using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace AliParaformerAsr.Model { public class FrontendConfEntity @@ -15,9 +9,10 @@ private int _n_mels = 80; private int _frame_length = 25; private int _frame_shift = 10; private float _dither = 0.0F; private float _dither = 1.0F; private int _lfr_m = 7; private int _lfr_n = 6; private bool _snip_edges = false; public int fs { get => _fs; set => _fs = value; } public string window { get => _window; set => _window = value; } @@ -27,5 +22,6 @@ public float dither { get => _dither; set => _dither = value; } public int lfr_m { get => _lfr_m; set => _lfr_m = value; } public int lfr_n { get => _lfr_n; set => _lfr_n = value; } public bool snip_edges { get => _snip_edges; set => _snip_edges = value; } } } runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelConfEntity.cs
@@ -1,11 +1,5 @@ // See https://github.com/manyeyes for more information // Copyright (c) 2023 by manyeyes using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace AliParaformerAsr.Model { public class ModelConfEntity @@ -16,6 +10,9 @@ private float _predictor_weight = 1.0F; private int _predictor_bias = 1; private float _sampling_ratio = 0.75F; private int _sos = 1; private int _eos = 2; private int _ignore_id = -1; public float ctc_weight { get => _ctc_weight; set => _ctc_weight = value; } public float lsm_weight { get => _lsm_weight; set => _lsm_weight = value; } @@ -23,5 +20,8 @@ public float predictor_weight { get => _predictor_weight; set => _predictor_weight = value; } public int predictor_bias { get => _predictor_bias; set => _predictor_bias = value; } public float sampling_ratio { get => _sampling_ratio; set => _sampling_ratio = value; } public int sos { get => _sos; set => _sos = value; } public int eos { get => _eos; set => _eos = value; } public int ignore_id { get => _ignore_id; set => _ignore_id = value; } } } runtime/csharp/AliParaformerAsr/AliParaformerAsr/Model/ModelOutputEntity.cs
New file @@ -0,0 +1,17 @@ // See https://github.com/manyeyes for more information // Copyright (c) 2024 by manyeyes using Microsoft.ML.OnnxRuntime.Tensors; namespace AliParaformerAsr.Model { internal class ModelOutputEntity { private Tensor<float>? _model_out; private int[]? _model_out_lens; private Tensor<float>? _cif_peak_tensor; public Tensor<float>? model_out { get => _model_out; set => _model_out = value; } public int[]? model_out_lens { get => _model_out_lens; set => _model_out_lens = value; } public Tensor<float>? cif_peak_tensor { get => _cif_peak_tensor; set => _cif_peak_tensor = value; } } } runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineModel.cs
New file @@ -0,0 +1,72 @@ // See https://github.com/manyeyes for more information // Copyright (c) 2024 by manyeyes using Microsoft.ML.OnnxRuntime; namespace AliParaformerAsr { public enum OnnxRumtimeTypes { CPU = 0, DML = 1, CUDA = 2, } public class OfflineModel { private InferenceSession _modelSession; private int _blank_id = 0; private int sos_eos_id = 1; private int _unk_id = 2; private int _featureDim = 80; private int _sampleRate = 16000; public OfflineModel(string modelFilePath, int threadsNum = 2, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0) { _modelSession = initModel(modelFilePath, threadsNum, rumtimeType, deviceId); } public int Blank_id { get => _blank_id; set => _blank_id = value; } public int Sos_eos_id { get => sos_eos_id; set => sos_eos_id = value; } public int Unk_id { get => _unk_id; set => _unk_id = value; } public int FeatureDim { get => _featureDim; set => _featureDim = value; } public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; } public int SampleRate { get => _sampleRate; set => _sampleRate = value; } public InferenceSession initModel(string modelFilePath, int threadsNum = 2, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0) { var options = new SessionOptions(); switch (rumtimeType) { case OnnxRumtimeTypes.DML: options.AppendExecutionProvider_DML(deviceId); break; case OnnxRumtimeTypes.CUDA: options.AppendExecutionProvider_CUDA(deviceId); break; default: options.AppendExecutionProvider_CPU(deviceId); break; } //options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO; options.InterOpNumThreads = threadsNum; InferenceSession onnxSession = new InferenceSession(modelFilePath, options); return onnxSession; } protected virtual void Dispose(bool disposing) { if (disposing) { if (_modelSession != null) { _modelSession.Dispose(); } } } internal void Dispose() { Dispose(disposing: true); GC.SuppressFinalize(this); } } } runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfParaformer.cs
New file @@ -0,0 +1,113 @@ // See https://github.com/manyeyes for more information // Copyright (c) 2024 by manyeyes using AliParaformerAsr.Model; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using AliParaformerAsr.Utils; namespace AliParaformerAsr { internal class OfflineProjOfParaformer : IOfflineProj, IDisposable { // To detect redundant calls private bool _disposed; private InferenceSession _modelSession; private int _blank_id = 0; private int _sos_eos_id = 1; private int _unk_id = 2; private int _featureDim = 80; private int _sampleRate = 16000; public OfflineProjOfParaformer(OfflineModel offlineModel) { _modelSession = offlineModel.ModelSession; _blank_id = offlineModel.Blank_id; _sos_eos_id = offlineModel.Sos_eos_id; _unk_id = offlineModel.Unk_id; _featureDim = offlineModel.FeatureDim; _sampleRate = offlineModel.SampleRate; } public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; } public int Blank_id { get => _blank_id; set => _blank_id = value; } public int Sos_eos_id { get => _sos_eos_id; set => _sos_eos_id = value; } public int Unk_id { get => _unk_id; set => _unk_id = value; } public int FeatureDim { get => _featureDim; set => _featureDim = value; } public int SampleRate { get => _sampleRate; set => _sampleRate = value; } public ModelOutputEntity ModelProj(List<OfflineInputEntity> modelInputs) { int batchSize = modelInputs.Count; float[] padSequence = PadHelper.PadSequence(modelInputs); var inputMeta = _modelSession.InputMetadata; var container = new List<NamedOnnxValue>(); foreach (var name in inputMeta.Keys) { if (name == "speech") { int[] dim = new int[] { batchSize, padSequence.Length / 560 / batchSize, 560 }; var tensor = new DenseTensor<float>(padSequence, dim, false); container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor)); } if (name == "speech_lengths") { int[] dim = new int[] { batchSize }; int[] speech_lengths = new int[batchSize]; for (int i = 0; i < batchSize; i++) { speech_lengths[i] = padSequence.Length / 560 / batchSize; } var tensor = new DenseTensor<int>(speech_lengths, dim, false); container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor)); } } ModelOutputEntity modelOutputEntity = new ModelOutputEntity(); try { IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = _modelSession.Run(container); if (results != null) { var resultsArray = results.ToArray(); modelOutputEntity.model_out = resultsArray[0].AsTensor<float>(); modelOutputEntity.model_out_lens = resultsArray[1].AsEnumerable<int>().ToArray(); if (resultsArray.Length >= 4) { Tensor<float> cif_peak_tensor = resultsArray[3].AsTensor<float>(); modelOutputEntity.cif_peak_tensor = cif_peak_tensor; } } } catch (Exception ex) { // } return modelOutputEntity; } protected virtual void Dispose(bool disposing) { if (!_disposed) { if (disposing) { if (_modelSession != null) { _modelSession.Dispose(); } } _disposed = true; } } public void Dispose() { Dispose(disposing: true); GC.SuppressFinalize(this); } ~OfflineProjOfParaformer() { Dispose(_disposed); } } } runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineProjOfSenseVoiceSmall.cs
New file @@ -0,0 +1,156 @@ // See https://github.com/manyeyes for more information // Copyright (c) 2024 by manyeyes using AliParaformerAsr.Model; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using AliParaformerAsr.Utils; namespace AliParaformerAsr { internal class OfflineProjOfSenseVoiceSmall : IOfflineProj, IDisposable { // To detect redundant calls private bool _disposed; private InferenceSession _modelSession; private int _blank_id = 0; private int _sos_eos_id = 1; private int _unk_id = 2; private int _featureDim = 80; private int _sampleRate = 16000; private bool _use_itn = false; private string _textnorm = "woitn"; private Dictionary<string, int> _lidDict = new Dictionary<string, int>() { { "auto", 0 }, { "zh", 3 }, { "en", 4 }, { "yue", 7 }, { "ja", 11 }, { "ko", 12 }, { "nospeech", 13 } }; private Dictionary<int, int> _lidIntDict = new Dictionary<int, int>() { { 24884, 3 }, { 24885, 4 }, { 24888, 7 }, { 24892, 11 }, { 24896, 12 }, { 24992, 13 } }; private Dictionary<string, int> _textnormDict = new Dictionary<string, int>() { { "withitn", 14 }, { "woitn", 15 } }; private Dictionary<int, int> _textnormIntDict = new Dictionary<int, int>() { { 25016, 14 }, { 25017, 15 } }; public OfflineProjOfSenseVoiceSmall(OfflineModel offlineModel) { _modelSession = offlineModel.ModelSession; _blank_id = offlineModel.Blank_id; _sos_eos_id = offlineModel.Sos_eos_id; _unk_id = offlineModel.Unk_id; _featureDim = offlineModel.FeatureDim; _sampleRate = offlineModel.SampleRate; } public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; } public int Blank_id { get => _blank_id; set => _blank_id = value; } public int Sos_eos_id { get => _sos_eos_id; set => _sos_eos_id = value; } public int Unk_id { get => _unk_id; set => _unk_id = value; } public int FeatureDim { get => _featureDim; set => _featureDim = value; } public int SampleRate { get => _sampleRate; set => _sampleRate = value; } public ModelOutputEntity ModelProj(List<OfflineInputEntity> modelInputs) { int batchSize = modelInputs.Count; float[] padSequence = PadHelper.PadSequence(modelInputs); // string languageValue = "ja"; int languageId = 0; if (_lidDict.ContainsKey(languageValue)) { languageId = _lidDict.GetValueOrDefault(languageValue); } string textnormValue = "withitn"; int textnormId = 15; if (_textnormDict.ContainsKey(textnormValue)) { textnormId = _textnormDict.GetValueOrDefault(textnormValue); } var inputMeta = _modelSession.InputMetadata; var container = new List<NamedOnnxValue>(); foreach (var name in inputMeta.Keys) { if (name == "speech") { int[] dim = new int[] { batchSize, padSequence.Length / 560 / batchSize, 560 }; var tensor = new DenseTensor<float>(padSequence, dim, false); container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor)); } if (name == "speech_lengths") { int[] dim = new int[] { batchSize }; int[] speech_lengths = new int[batchSize]; for (int i = 0; i < batchSize; i++) { speech_lengths[i] = padSequence.Length / 560 / batchSize; } var tensor = new DenseTensor<int>(speech_lengths, dim, false); container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor)); } if (name == "language") { int[] language = new int[batchSize]; for (int i = 0; i < batchSize; i++) { language[i] = languageId; } int[] dim = new int[] { batchSize }; var tensor = new DenseTensor<int>(language, dim, false); container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor)); } if (name == "textnorm") { int[] textnorm = new int[batchSize]; for (int i = 0; i < batchSize; i++) { textnorm[i] = textnormId; } int[] dim = new int[] { batchSize }; var tensor = new DenseTensor<int>(textnorm, dim, false); container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor)); } } ModelOutputEntity modelOutputEntity = new ModelOutputEntity(); try { IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = _modelSession.Run(container); if (results != null) { var resultsArray = results.ToArray(); modelOutputEntity.model_out = resultsArray[0].AsTensor<float>(); modelOutputEntity.model_out_lens = resultsArray[1].AsEnumerable<int>().ToArray(); if (resultsArray.Length >= 4) { Tensor<float> cif_peak_tensor = resultsArray[3].AsTensor<float>(); modelOutputEntity.cif_peak_tensor = cif_peak_tensor; } } } catch (Exception ex) { // } return modelOutputEntity; } protected virtual void Dispose(bool disposing) { if (!_disposed) { if (disposing) { if (_modelSession != null) { _modelSession.Dispose(); } } _disposed = true; } } public void Dispose() { Dispose(disposing: true); GC.SuppressFinalize(this); } ~OfflineProjOfSenseVoiceSmall() { Dispose(_disposed); } } } runtime/csharp/AliParaformerAsr/AliParaformerAsr/OfflineRecognizer.cs
@@ -1,28 +1,17 @@ // See https://github.com/manyeyes for more information // Copyright (c) 2023 by manyeyes using System.Linq; using System.Text; using System.Threading.Tasks; // Copyright (c) 2024 by manyeyes using AliParaformerAsr.Model; using AliParaformerAsr.Utils; using Microsoft.Extensions.Logging; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using Microsoft.Extensions.Logging; using System.Text.RegularExpressions; using Newtonsoft.Json.Linq; using System.Text.RegularExpressions; // 模型文件地址: https://modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx // 模型文件地址: https://www.modelscope.cn/models/manyeyes/sensevoice-small-onnx namespace AliParaformerAsr { public enum OnnxRumtimeTypes { CPU = 0, DML = 1, CUDA = 2, } /// <summary> /// offline recognizer package /// Copyright (c) 2023 by manyeyes @@ -35,6 +24,8 @@ private string _frontend; private FrontendConfEntity _frontendConfEntity; private string[] _tokens; private IOfflineProj? _offlineProj; private OfflineModel _offlineModel; /// <summary> /// @@ -48,24 +39,9 @@ /// <param name="batchSize"></param> /// <param name="threadsNum"></param> /// <exception cref="ArgumentException"></exception> public OfflineRecognizer(string modelFilePath, string configFilePath, string mvnFilePath, string tokensFilePath, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0) public OfflineRecognizer(string modelFilePath, string configFilePath, string mvnFilePath, string tokensFilePath, int threadsNum = 1, OnnxRumtimeTypes rumtimeType = OnnxRumtimeTypes.CPU, int deviceId = 0) { var options = new SessionOptions(); switch(rumtimeType) { case OnnxRumtimeTypes.DML: options.AppendExecutionProvider_DML(deviceId); break; case OnnxRumtimeTypes.CUDA: options.AppendExecutionProvider_CUDA(deviceId); break; default: options.AppendExecutionProvider_CPU(deviceId); break; } //options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO; _onnxSession = new InferenceSession(modelFilePath, options); _offlineModel = new OfflineModel(modelFilePath, threadsNum); string[] tokenLines; if (tokensFilePath.EndsWith(".txt")) @@ -86,6 +62,18 @@ _tokens = tokenLines; OfflineYamlEntity offlineYamlEntity = YamlHelper.ReadYaml<OfflineYamlEntity>(configFilePath); switch (offlineYamlEntity.model.ToLower()) { case "paraformer": _offlineProj = new OfflineProjOfParaformer(_offlineModel); break; case "sensevoicesmall": _offlineProj = new OfflineProjOfSenseVoiceSmall(_offlineModel); break; default: _offlineProj = null; break; } _wavFrontend = new WavFrontend(mvnFilePath, offlineYamlEntity.frontend_conf); _frontend = offlineYamlEntity.frontend; _frontendConfEntity = offlineYamlEntity.frontend_conf; @@ -120,73 +108,40 @@ private OfflineOutputEntity Forward(List<OfflineInputEntity> modelInputs) { int BatchSize = modelInputs.Count; float[] padSequence = PadSequence(modelInputs); var inputMeta = _onnxSession.InputMetadata; var container = new List<NamedOnnxValue>(); foreach (var name in inputMeta.Keys) { if (name == "speech") { int[] dim = new int[] { BatchSize, padSequence.Length / 560 / BatchSize, 560 };//inputMeta["speech"].Dimensions[2] var tensor = new DenseTensor<float>(padSequence, dim, false); container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor)); } if (name == "speech_lengths") { int[] dim = new int[] { BatchSize }; int[] speech_lengths = new int[BatchSize]; for (int i = 0; i < BatchSize; i++) { speech_lengths[i] = padSequence.Length / 560 / BatchSize; } var tensor = new DenseTensor<int>(speech_lengths, dim, false); container.Add(NamedOnnxValue.CreateFromTensor<int>(name, tensor)); } } IReadOnlyCollection<string> outputNames = new List<string>(); outputNames.Append("logits"); outputNames.Append("token_num"); IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = null; OfflineOutputEntity offlineOutputEntity = new OfflineOutputEntity(); try { results = _onnxSession.Run(container); ModelOutputEntity modelOutputEntity = _offlineProj.ModelProj(modelInputs); if (modelOutputEntity != null) { offlineOutputEntity.Token_nums_length = modelOutputEntity.model_out_lens.AsEnumerable<int>().ToArray(); Tensor<float> logits_tensor = modelOutputEntity.model_out; List<int[]> token_nums = new List<int[]> { }; for (int i = 0; i < logits_tensor.Dimensions[0]; i++) { int[] item = new int[logits_tensor.Dimensions[1]]; for (int j = 0; j < logits_tensor.Dimensions[1]; j++) { int token_num = 0; for (int k = 1; k < logits_tensor.Dimensions[2]; k++) { token_num = logits_tensor[i, j, token_num] > logits_tensor[i, j, k] ? token_num : k; } item[j] = (int)token_num; } token_nums.Add(item); } offlineOutputEntity.Token_nums = token_nums; } } catch (Exception ex) { // } OfflineOutputEntity modelOutput = new OfflineOutputEntity(); if (results != null) { var resultsArray = results.ToArray(); modelOutput.Logits = resultsArray[0].AsEnumerable<float>().ToArray(); modelOutput.Token_nums_length = resultsArray[1].AsEnumerable<int>().ToArray(); Tensor<float> logits_tensor = resultsArray[0].AsTensor<float>(); Tensor<Int64> token_nums_tensor = resultsArray[1].AsTensor<Int64>(); List<int[]> token_nums = new List<int[]> { }; for (int i = 0; i < logits_tensor.Dimensions[0]; i++) { int[] item = new int[logits_tensor.Dimensions[1]]; for (int j = 0; j < logits_tensor.Dimensions[1]; j++) { int token_num = 0; for (int k = 1; k < logits_tensor.Dimensions[2]; k++) { token_num = logits_tensor[i, j, token_num] > logits_tensor[i, j, k] ? token_num : k; } item[j] = (int)token_num; } token_nums.Add(item); } modelOutput.Token_nums = token_nums; } return modelOutput; return offlineOutputEntity; } private List<string> DecodeMulti(List<int[]> token_nums) @@ -203,9 +158,9 @@ break; } string tokenChar = _tokens[token]; string tokenChar = _tokens[token].Split("\t")[0]; if (tokenChar != "</s>" && tokenChar != "<s>" && tokenChar != "<blank>") if (tokenChar != "</s>" && tokenChar != "<s>" && tokenChar != "<blank>" && tokenChar != "<unk>") { if (IsChinese(tokenChar, true)) { @@ -244,48 +199,6 @@ else return false; } private float[] PadSequence(List<OfflineInputEntity> modelInputs) { int max_speech_length = modelInputs.Max(x => x.SpeechLength); int speech_length = max_speech_length * modelInputs.Count; float[] speech = new float[speech_length]; float[,] xxx = new float[modelInputs.Count, max_speech_length]; for (int i = 0; i < modelInputs.Count; i++) { if (max_speech_length == modelInputs[i].SpeechLength) { for (int j = 0; j < xxx.GetLength(1); j++) { #pragma warning disable CS8602 // 解引用可能出现空引用。 xxx[i, j] = modelInputs[i].Speech[j]; #pragma warning restore CS8602 // 解引用可能出现空引用。 } continue; } float[] nullspeech = new float[max_speech_length - modelInputs[i].SpeechLength]; float[]? curr_speech = modelInputs[i].Speech; float[] padspeech = new float[max_speech_length]; padspeech = _wavFrontend.ApplyCmvn(padspeech); Array.Copy(curr_speech, 0, padspeech, 0, curr_speech.Length); for (int j = 0; j < padspeech.Length; j++) { #pragma warning disable CS8602 // 解引用可能出现空引用。 xxx[i, j] = padspeech[j]; #pragma warning restore CS8602 // 解引用可能出现空引用。 } } int s = 0; for (int i = 0; i < xxx.GetLength(0); i++) { for (int j = 0; j < xxx.GetLength(1); j++) { speech[s] = xxx[i, j]; s++; } } return speech; } } } runtime/csharp/AliParaformerAsr/AliParaformerAsr/Utils/PadHelper.cs
New file @@ -0,0 +1,55 @@ using AliParaformerAsr.Model; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace AliParaformerAsr.Utils { internal static class PadHelper { public static float[] PadSequence(List<OfflineInputEntity> modelInputs) { int max_speech_length = modelInputs.Max(x => x.SpeechLength); int speech_length = max_speech_length * modelInputs.Count; float[] speech = new float[speech_length]; float[,] xxx = new float[modelInputs.Count, max_speech_length]; for (int i = 0; i < modelInputs.Count; i++) { if (max_speech_length == modelInputs[i].SpeechLength) { for (int j = 0; j < xxx.GetLength(1); j++) { #pragma warning disable CS8602 // 解引用可能出现空引用。 xxx[i, j] = modelInputs[i].Speech[j]; #pragma warning restore CS8602 // 解引用可能出现空引用。 } continue; } float[] nullspeech = new float[max_speech_length - modelInputs[i].SpeechLength]; float[]? curr_speech = modelInputs[i].Speech; float[] padspeech = new float[max_speech_length]; Array.Copy(curr_speech, 0, padspeech, 0, curr_speech.Length); for (int j = 0; j < padspeech.Length; j++) { #pragma warning disable CS8602 // 解引用可能出现空引用。 xxx[i, j] = padspeech[j]; #pragma warning restore CS8602 // 解引用可能出现空引用。 } } int s = 0; for (int i = 0; i < xxx.GetLength(0); i++) { for (int j = 0; j < xxx.GetLength(1); j++) { speech[s] = xxx[i, j]; s++; } } speech = speech.Select(x => x == 0 ? -23.025850929940457F * 32768 : x).ToArray(); return speech; } } } runtime/csharp/AliParaformerAsr/AliParaformerAsr/WavFrontend.cs
@@ -30,7 +30,8 @@ _fbank_beg_idx = 0; _onlineFbank = new OnlineFbank( dither: _frontendConfEntity.dither, snip_edges: false, snip_edges: _frontendConfEntity.snip_edges, window_type: _frontendConfEntity.window, sample_rate: _frontendConfEntity.fs, num_bins: _frontendConfEntity.n_mels ); runtime/csharp/AliParaformerAsr/README.md
@@ -1,4 +1,10 @@ # AliParaformerAsr ##### 支持模型 ## paraformer-large offline onnx模型下载 https://huggingface.co/manyeyes/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx ## SenseVoiceSmall offline onnx模型下载 https://www.modelscope.cn/models/manyeyes/sensevoice-small-onnx ##### 简介: 项目中使用的Asr模型是阿里巴巴达摩院提供的Paraformer-large ASR模型。 **项目基于Net 6.0,使用C#编写,调用Microsoft.ML.OnnxRuntime对onnx模型进行解码,支持跨平台编译。项目以库的形式进行调用,部署非常方便。** @@ -25,9 +31,6 @@ ##### ASR常用参数(参考:asr.yaml文件): 用于解码的asr.yaml配置参数,取自官方模型配置config.yaml原文件。便于跟进和升级。 ## paraformer-large offline onnx模型下载 https://huggingface.co/manyeyes/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx ## 离线(非流式)模型调用方法: