add sensevoice in offline-stream
| | |
| | | // APIs for Offline-stream Infer |
| | | _FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, |
| | | FUNASR_MODE mode, QM_CALLBACK fn_callback, const std::vector<std::vector<float>> &hw_emb, |
| | | int sampling_rate, std::string wav_format, bool itn, FUNASR_DEC_HANDLE dec_handle) |
| | | int sampling_rate, std::string wav_format, bool itn, FUNASR_DEC_HANDLE dec_handle, |
| | | std::string svs_lang, bool svs_itn) |
| | | { |
| | | funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle; |
| | | if (!offline_stream) |
| | |
| | | if (wfst_decoder){ |
| | | wfst_decoder->StartUtterance(); |
| | | } |
| | | vector<string> msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle, batch_in); |
| | | vector<string> msg_batch; |
| | | if(offline_stream->GetModelType() == MODEL_SVS){ |
| | | msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, svs_lang, svs_itn, batch_in); |
| | | }else{ |
| | | msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle, batch_in); |
| | | } |
| | | for(int idx=0; idx<batch_in; idx++){ |
| | | string msg = msg_batch[idx]; |
| | | if(msg_idx < index_vector.size()){ |
| | |
| | | } |
| | | for(int idx=0; idx<msgs.size(); idx++){ |
| | | string msg = msgs[idx]; |
| | | std::vector<std::string> msg_vec = funasr::split(msg, '|'); |
| | | std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | "); |
| | | if(msg_vec.size()==0){ |
| | | continue; |
| | | } |
| | |
| | | } |
| | | for(int idx=0; idx<msgs.size(); idx++){ |
| | | string msg = msgs[idx]; |
| | | std::vector<std::string> msg_vec = funasr::split(msg, '|'); |
| | | std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | "); |
| | | if(msg_vec.size()==0){ |
| | | continue; |
| | | } |
| | |
| | | len[0] = frame->len; |
| | | vector<string> msgs = ((funasr::Paraformer*)asr_handle)->Forward(buff, len, frame->is_final, hw_emb, dec_handle); |
| | | string msg = msgs.size()>0?msgs[0]:""; |
| | | std::vector<std::string> msg_vec = funasr::split(msg, '|'); // split with timestamp |
| | | std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | "); // split with timestamp |
| | | if(msg_vec.size()==0){ |
| | | continue; |
| | | } |
| | |
| | | use_gpu = false; |
| | | #endif |
| | | }else{ |
| | | asr_handle = make_unique<Paraformer>(); |
| | | if (model_path.at(MODEL_DIR).find(MODEL_SVS) != std::string::npos) |
| | | { |
| | | asr_handle = make_unique<SenseVoiceSmall>(); |
| | | model_type = MODEL_SVS; |
| | | }else{ |
| | | asr_handle = make_unique<Paraformer>(); |
| | | } |
| | | } |
| | | |
| | | bool enable_hotword = false; |
| | |
| | | } |
| | | } |
| | | #endif |
| | | if(model_type == MODEL_SVS){ |
| | | use_itn = false; |
| | | use_punc = false; |
| | | } |
| | | } |
| | | |
| | | OfflineStream *CreateOfflineStream(std::map<std::string, std::string>& model_path, int thread_num, bool use_gpu, int batch_size) |
| | |
| | | #include "seg_dict.h" |
| | | #include "resample.h" |
| | | #include "paraformer.h" |
| | | #include "sensevoice-small.h" |
| | | #ifdef USE_GPU |
| | | #include "paraformer-torch.h" |
| | | #endif |
| | |
| | | return elems; |
| | | } |
| | | |
| | | std::vector<std::string> SplitStr(const std::string &s, string delimiter) { |
| | | std::vector<std::string> tokens; |
| | | size_t start = 0; |
| | | size_t end = s.find(delimiter); |
| | | |
| | | while (end != std::string::npos) { |
| | | tokens.push_back(s.substr(start, end - start)); |
| | | start = end + delimiter.length(); |
| | | end = s.find(delimiter, start); |
| | | } |
| | | tokens.push_back(s.substr(start, end - start)); |
| | | |
| | | return tokens; |
| | | } |
| | | |
| | | template<typename T> |
| | | void PrintMat(const std::vector<std::vector<T>> &mat, const std::string &name) { |
| | | std::cout << name << ":" << std::endl; |
| | |
| | | std::string TimestampSmooth(std::string &text, std::string &text_itn, std::string &str_time); |
| | | std::string TimestampSentence(std::string &text, std::string &str_time); |
| | | std::vector<std::string> split(const std::string &s, char delim); |
| | | std::vector<std::string> SplitStr(const std::string &s, string delimiter); |
| | | |
| | | template<typename T> |
| | | void PrintMat(const std::vector<std::vector<T>> &mat, const std::string &name); |