From b7060884fa4b8b85f79462644a5c99062d223da0 Mon Sep 17 00:00:00 2001
From: Yabin Li <wucong.lyb@alibaba-inc.com>
Date: 星期二, 25 六月 2024 17:38:04 +0800
Subject: [PATCH] Merge Dev tclas (#1847)

---
 runtime/onnxruntime/src/paraformer-torch.cpp |  211 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 193 insertions(+), 18 deletions(-)

diff --git a/runtime/onnxruntime/src/paraformer-torch.cpp b/runtime/onnxruntime/src/paraformer-torch.cpp
index a5f7194..466d80a 100644
--- a/runtime/onnxruntime/src/paraformer-torch.cpp
+++ b/runtime/onnxruntime/src/paraformer-torch.cpp
@@ -50,6 +50,11 @@
         torch::jit::script::Module model = torch::jit::load(am_model, device);
         model_ = std::make_shared<TorchModule>(std::move(model)); 
         LOG(INFO) << "Successfully load model from " << am_model;
+        torch::NoGradGuard no_grad;
+        model_->eval();
+        torch::jit::setGraphExecutorOptimize(false);
+        torch::jit::FusionStrategy static0 = {{torch::jit::FusionBehavior::STATIC, 0}};
+        torch::jit::setFusionStrategy(static0);
     } catch (std::exception const &e) {
         LOG(ERROR) << "Error when load am model: " << am_model << e.what();
         exit(-1);
@@ -100,6 +105,27 @@
 
 void ParaformerTorch::InitHwCompiler(const std::string &hw_model, int thread_num) {
     // TODO
+    torch::DeviceType device = at::kCPU;
+    #ifdef USE_GPU
+    if (!torch::cuda::is_available()) {
+        // LOG(ERROR) << "CUDA is not available! Please check your GPU settings";
+        exit(-1);
+    } else {
+        // LOG(INFO) << "CUDA is available, running on GPU";
+        device = at::kCUDA;
+    }
+    #endif
+
+    try {
+        torch::jit::script::Module model = torch::jit::load(hw_model, device);
+        hw_model_ = std::make_shared<TorchModule>(std::move(model));
+        LOG(INFO) << "Successfully load model from " << hw_model;
+        torch::NoGradGuard no_grad;
+        hw_model_->eval();
+    } catch (std::exception const &e) {
+        LOG(ERROR) << "Error when load hw model: " << hw_model << e.what();
+        exit(-1);
+    }
     use_hotword = true;
 }
 
@@ -111,15 +137,19 @@
 {
     if(vocab){
         delete vocab;
+        vocab = nullptr;
     }
     if(lm_vocab){
         delete lm_vocab;
+        lm_vocab = nullptr;
     }
     if(seg_dict){
         delete seg_dict;
+        seg_dict = nullptr;
     }
     if(phone_set_){
         delete phone_set_;
+        phone_set_ = nullptr;
     }
 }
 
@@ -267,6 +297,9 @@
 
 std::vector<std::string> ParaformerTorch::Forward(float** din, int* len, bool input_finished, const std::vector<std::vector<float>> &hw_emb, void* decoder_handle, int batch_in)
 {
+    vector<std::string> results;
+    string result="";
+
     WfstDecoder* wfst_decoder = (WfstDecoder*)decoder_handle;
     int32_t in_feat_dim = fbank_opts_.mel_opts.num_bins;
     int32_t feature_dim = lfr_m*in_feat_dim;
@@ -295,8 +328,13 @@
         feats_batch.emplace_back(flattened);
     }
 
-    torch::NoGradGuard no_grad;
-    model_->eval();
+    if(max_frames == 0){
+        for(int index=0; index<batch_in; index++){
+            results.push_back(result);
+        }
+        return results;
+    }
+
     // padding
     std::vector<float> all_feats(batch_in * max_frames * feature_dim);
     for(int index=0; index<batch_in; index++){
@@ -317,8 +355,52 @@
     #endif
     std::vector<torch::jit::IValue> inputs = {feats, feat_lens};
 
-    vector<std::string> results;
+    std::vector<float> batch_embedding;
+    std::vector<float> embedding;
+    try{
+        if (use_hotword) {
+            if(hw_emb.size()<=0){
+                LOG(ERROR) << "hw_emb is null";
+                for(int index=0; index<batch_in; index++){
+                    results.push_back(result);
+                }
+                return results;
+            }
+            
+            embedding.reserve(hw_emb.size() * hw_emb[0].size());
+            for (auto item : hw_emb) {
+                embedding.insert(embedding.end(), item.begin(), item.end());
+            }
+            batch_embedding.reserve(batch_in * embedding.size());
+            for (size_t index = 0; index < batch_in; ++index) {
+                batch_embedding.insert(batch_embedding.end(), embedding.begin(), embedding.end());
+            }
+
+            torch::Tensor tensor_hw_emb =
+                torch::from_blob(batch_embedding.data(),
+                        {batch_in, static_cast<int64_t>(hw_emb.size()), static_cast<int64_t>(hw_emb[0].size())}, torch::kFloat).contiguous();
+            #ifdef USE_GPU
+            tensor_hw_emb = tensor_hw_emb.to(at::kCUDA);
+            #endif
+            inputs.emplace_back(tensor_hw_emb);
+        }
+    }catch (std::exception const &e)
+    {
+        LOG(ERROR)<<e.what();
+        for(int index=0; index<batch_in; index++){
+            results.push_back(result);
+        }
+        return results;
+    }
+
     try {
+        if(inputs.size() == 0){
+            LOG(ERROR) << "inputs of forward is null";
+            for(int index=0; index<batch_in; index++){
+                results.push_back(result);
+            }
+            return results;
+        }
         auto outputs = model_->forward(inputs).toTuple()->elements();
         torch::Tensor am_scores;
         torch::Tensor valid_token_lens;
@@ -329,28 +411,31 @@
         am_scores = outputs[0].toTensor();
         valid_token_lens = outputs[1].toTensor();
         #endif
+
+        torch::Tensor us_alphas_tensor;
+        torch::Tensor us_peaks_tensor;
+        if(outputs.size() == 4){
+            #ifdef USE_GPU
+            us_alphas_tensor = outputs[2].toTensor().to(at::kCPU);
+            us_peaks_tensor = outputs[3].toTensor().to(at::kCPU);
+            #else
+            us_alphas_tensor = outputs[2].toTensor();
+            us_peaks_tensor = outputs[3].toTensor();
+            #endif
+        }
+
         // timestamp
         for(int index=0; index<batch_in; index++){
-            string result="";
+            result="";
             if(outputs.size() == 4){
-                torch::Tensor us_alphas_tensor;
-                torch::Tensor us_peaks_tensor;
-                #ifdef USE_GPU
-                us_alphas_tensor = outputs[2].toTensor().to(at::kCPU);
-                us_peaks_tensor = outputs[3].toTensor().to(at::kCPU);
-                #else
-                us_alphas_tensor = outputs[2].toTensor();
-                us_peaks_tensor = outputs[3].toTensor();
-                #endif
-
                 float* us_alphas_data = us_alphas_tensor[index].data_ptr<float>();
-                std::vector<float> us_alphas(paraformer_length[index]);
+                std::vector<float> us_alphas(paraformer_length[index]*3);
                 for (int i = 0; i < us_alphas.size(); i++) {
                     us_alphas[i] = us_alphas_data[i];
                 }
 
                 float* us_peaks_data = us_peaks_tensor[index].data_ptr<float>();
-                std::vector<float> us_peaks(paraformer_length[index]);
+                std::vector<float> us_peaks(paraformer_length[index]*3);
                 for (int i = 0; i < us_peaks.size(); i++) {
                     us_peaks[i] = us_peaks_data[i];
                 }
@@ -387,8 +472,98 @@
 }
 
 std::vector<std::vector<float>> ParaformerTorch::CompileHotwordEmbedding(std::string &hotwords) {
-    // TODO
-    std::vector<std::vector<float>> result(1, std::vector<float>(10, 0.0f));
+    int embedding_dim = encoder_size;
+    std::vector<std::vector<float>> hw_emb;
+    if (!use_hotword) {
+        std::vector<float> vec(embedding_dim, 0);
+        hw_emb.push_back(vec);
+        return hw_emb;
+    }
+    int max_hotword_len = 10;
+    std::vector<int32_t> hotword_matrix;
+    std::vector<int32_t> lengths;
+    int hotword_size = 1;
+    int real_hw_size = 0;
+    if (!hotwords.empty()) {
+      std::vector<std::string> hotword_array = split(hotwords, ' ');
+      hotword_size = hotword_array.size() + 1;
+      hotword_matrix.reserve(hotword_size * max_hotword_len);
+      for (auto hotword : hotword_array) {
+        std::vector<std::string> chars;
+        if (EncodeConverter::IsAllChineseCharactor((const U8CHAR_T*)hotword.c_str(), hotword.size())) {
+          KeepChineseCharacterAndSplit(hotword, chars);
+        } else {
+          // for english
+          std::vector<std::string> words = split(hotword, ' ');
+          for (auto word : words) {
+            std::vector<string> tokens = seg_dict->GetTokensByWord(word);
+            chars.insert(chars.end(), tokens.begin(), tokens.end());
+          }
+        }
+        if(chars.size()==0){
+            continue;
+        }
+        std::vector<int32_t> hw_vector(max_hotword_len, 0);
+        int vector_len = std::min(max_hotword_len, (int)chars.size());
+        int chs_oov = false;
+        for (int i=0; i<vector_len; i++) {
+          hw_vector[i] = phone_set_->String2Id(chars[i]);
+          if(hw_vector[i] == -1){
+            chs_oov = true;
+            break;
+          }
+        }
+        if(chs_oov){
+          LOG(INFO) << "OOV: " << hotword;
+          continue;
+        }
+        LOG(INFO) << hotword;
+        lengths.push_back(vector_len);
+        real_hw_size += 1;
+        hotword_matrix.insert(hotword_matrix.end(), hw_vector.begin(), hw_vector.end());
+      }
+      hotword_size = real_hw_size + 1;
+    }
+    std::vector<int32_t> blank_vec(max_hotword_len, 0);
+    blank_vec[0] = 1;
+    hotword_matrix.insert(hotword_matrix.end(), blank_vec.begin(), blank_vec.end());
+    lengths.push_back(1);
+
+    torch::Tensor feats =
+        torch::from_blob(hotword_matrix.data(),
+                {hotword_size, max_hotword_len}, torch::kInt32).contiguous();
+
+    // 2. forward
+    #ifdef USE_GPU
+    feats = feats.to(at::kCUDA);
+    #endif
+    std::vector<torch::jit::IValue> inputs = {feats};
+    std::vector<std::vector<float>> result;
+    try {
+        auto output = hw_model_->forward(inputs);
+        torch::Tensor emb_tensor;
+        #ifdef USE_GPU
+        emb_tensor = output.toTensor().to(at::kCPU);
+        #else
+        emb_tensor = output.toTensor();
+        #endif
+        assert(emb_tensor.size(0) == max_hotword_len);
+        assert(emb_tensor.size(1) == hotword_size);
+        embedding_dim = emb_tensor.size(2);
+
+        float* floatData = emb_tensor.data_ptr<float>();
+        for (int j = 0; j < hotword_size; j++)
+        {
+            int start_pos = hotword_size * (lengths[j] - 1) * embedding_dim + j * embedding_dim;
+            std::vector<float> embedding;
+            embedding.insert(embedding.begin(), floatData + start_pos, floatData + start_pos + embedding_dim);
+            result.push_back(embedding);
+        }
+    }
+    catch (std::exception const &e)
+    {
+        LOG(ERROR)<<e.what();
+    }
     return result;
 }
 

--
Gitblit v1.9.1