// See https://github.com/manyeyes for more information // Copyright (c) 2024 by manyeyes using AliParaformerAsr.Model; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using AliParaformerAsr.Utils; namespace AliParaformerAsr { internal class OfflineProjOfParaformer : IOfflineProj, IDisposable { // To detect redundant calls private bool _disposed; private InferenceSession _modelSession; private int _blank_id = 0; private int _sos_eos_id = 1; private int _unk_id = 2; private int _featureDim = 80; private int _sampleRate = 16000; public OfflineProjOfParaformer(OfflineModel offlineModel) { _modelSession = offlineModel.ModelSession; _blank_id = offlineModel.Blank_id; _sos_eos_id = offlineModel.Sos_eos_id; _unk_id = offlineModel.Unk_id; _featureDim = offlineModel.FeatureDim; _sampleRate = offlineModel.SampleRate; } public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; } public int Blank_id { get => _blank_id; set => _blank_id = value; } public int Sos_eos_id { get => _sos_eos_id; set => _sos_eos_id = value; } public int Unk_id { get => _unk_id; set => _unk_id = value; } public int FeatureDim { get => _featureDim; set => _featureDim = value; } public int SampleRate { get => _sampleRate; set => _sampleRate = value; } public ModelOutputEntity ModelProj(List modelInputs) { int batchSize = modelInputs.Count; float[] padSequence = PadHelper.PadSequence(modelInputs); var inputMeta = _modelSession.InputMetadata; var container = new List(); foreach (var name in inputMeta.Keys) { if (name == "speech") { int[] dim = new int[] { batchSize, padSequence.Length / 560 / batchSize, 560 }; var tensor = new DenseTensor(padSequence, dim, false); container.Add(NamedOnnxValue.CreateFromTensor(name, tensor)); } if (name == "speech_lengths") { int[] dim = new int[] { batchSize }; int[] speech_lengths = new int[batchSize]; for (int i = 0; i < batchSize; i++) { speech_lengths[i] = padSequence.Length / 560 / batchSize; } var tensor = new DenseTensor(speech_lengths, dim, false); container.Add(NamedOnnxValue.CreateFromTensor(name, tensor)); } } ModelOutputEntity modelOutputEntity = new ModelOutputEntity(); try { IDisposableReadOnlyCollection results = _modelSession.Run(container); if (results != null) { var resultsArray = results.ToArray(); modelOutputEntity.model_out = resultsArray[0].AsTensor(); modelOutputEntity.model_out_lens = resultsArray[1].AsEnumerable().ToArray(); if (resultsArray.Length >= 4) { Tensor cif_peak_tensor = resultsArray[3].AsTensor(); modelOutputEntity.cif_peak_tensor = cif_peak_tensor; } } } catch (Exception ex) { // } return modelOutputEntity; } protected virtual void Dispose(bool disposing) { if (!_disposed) { if (disposing) { if (_modelSession != null) { _modelSession.Dispose(); } } _disposed = true; } } public void Dispose() { Dispose(disposing: true); GC.SuppressFinalize(this); } ~OfflineProjOfParaformer() { Dispose(_disposed); } } }