雾聪
2024-09-25 d62d237a76e423fd1eec31e662162c135d2f93f5
add sensevoice in offline-stream
5个文件已修改
45 ■■■■ 已修改文件
runtime/onnxruntime/src/funasrruntime.cpp 16 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/onnxruntime/src/offline-stream.cpp 12 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/onnxruntime/src/precomp.h 1 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/onnxruntime/src/util.cpp 15 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/onnxruntime/src/util.h 1 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/onnxruntime/src/funasrruntime.cpp
@@ -207,7 +207,8 @@
    // APIs for Offline-stream Infer
    _FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, 
                                                   FUNASR_MODE mode, QM_CALLBACK fn_callback, const std::vector<std::vector<float>> &hw_emb, 
                                                   int sampling_rate, std::string wav_format, bool itn, FUNASR_DEC_HANDLE dec_handle)
                                                   int sampling_rate, std::string wav_format, bool itn, FUNASR_DEC_HANDLE dec_handle,
                                                   std::string svs_lang, bool svs_itn)
    {
        funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
        if (!offline_stream)
@@ -256,7 +257,12 @@
            if (wfst_decoder){
                wfst_decoder->StartUtterance();
            }
            vector<string> msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle, batch_in);
            vector<string> msg_batch;
            if(offline_stream->GetModelType() == MODEL_SVS){
                msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, svs_lang, svs_itn, batch_in);
            }else{
                msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle, batch_in);
            }
            for(int idx=0; idx<batch_in; idx++){
                string msg = msg_batch[idx];
                if(msg_idx < index_vector.size()){
@@ -280,7 +286,7 @@
        }
        for(int idx=0; idx<msgs.size(); idx++){
            string msg = msgs[idx];
            std::vector<std::string> msg_vec = funasr::split(msg, '|');
            std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | ");
            if(msg_vec.size()==0){
                continue;
            }
@@ -402,7 +408,7 @@
        }
        for(int idx=0; idx<msgs.size(); idx++){
            string msg = msgs[idx];
            std::vector<std::string> msg_vec = funasr::split(msg, '|');
            std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | ");
            if(msg_vec.size()==0){
                continue;
            }
@@ -563,7 +569,7 @@
            len[0] = frame->len;
            vector<string> msgs = ((funasr::Paraformer*)asr_handle)->Forward(buff, len, frame->is_final, hw_emb, dec_handle);
            string msg = msgs.size()>0?msgs[0]:"";
            std::vector<std::string> msg_vec = funasr::split(msg, '|');  // split with timestamp
            std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | ");  // split with timestamp
            if(msg_vec.size()==0){
                continue;
            }
runtime/onnxruntime/src/offline-stream.cpp
@@ -47,7 +47,13 @@
            use_gpu = false;
            #endif
        }else{
            asr_handle = make_unique<Paraformer>();
            if (model_path.at(MODEL_DIR).find(MODEL_SVS) != std::string::npos)
            {
                asr_handle = make_unique<SenseVoiceSmall>();
                model_type = MODEL_SVS;
            }else{
                asr_handle = make_unique<Paraformer>();
            }
        }
        bool enable_hotword = false;
@@ -138,6 +144,10 @@
        }
    }
#endif
    if(model_type == MODEL_SVS){
        use_itn = false;
        use_punc = false;
    }
}
OfflineStream *CreateOfflineStream(std::map<std::string, std::string>& model_path, int thread_num, bool use_gpu, int batch_size)
runtime/onnxruntime/src/precomp.h
@@ -64,6 +64,7 @@
#include "seg_dict.h"
#include "resample.h"
#include "paraformer.h"
#include "sensevoice-small.h"
#ifdef USE_GPU
#include "paraformer-torch.h"
#endif
runtime/onnxruntime/src/util.cpp
@@ -646,6 +646,21 @@
  return elems;
}
std::vector<std::string> SplitStr(const std::string &s, string delimiter) {
    std::vector<std::string> tokens;
    size_t start = 0;
    size_t end = s.find(delimiter);
    while (end != std::string::npos) {
        tokens.push_back(s.substr(start, end - start));
        start = end + delimiter.length();
        end = s.find(delimiter, start);
    }
    tokens.push_back(s.substr(start, end - start));
    return tokens;
}
template<typename T>
void PrintMat(const std::vector<std::vector<T>> &mat, const std::string &name) {
  std::cout << name << ":" << std::endl;
runtime/onnxruntime/src/util.h
@@ -49,6 +49,7 @@
std::string TimestampSmooth(std::string &text, std::string &text_itn, std::string &str_time);
std::string TimestampSentence(std::string &text, std::string &str_time);
std::vector<std::string> split(const std::string &s, char delim);
std::vector<std::string> SplitStr(const std::string &s, string delimiter);
template<typename T>
void PrintMat(const std::vector<std::vector<T>> &mat, const std::string &name);