| | |
| | | // punc |
| | | #define PUNC_MODEL_FILE "punc_model.onnx" |
| | | #define PUNC_YAML_FILE "punc.yaml" |
| | | |
| | | #define UNK_CHAR "<unk>" |
| | | |
| | | #define INPUT_NUM 2 |
| | |
| | | |
| | | // if not give a fn_callback ,it should be NULL |
| | | _FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad=false, bool use_punc=false); |
| | | |
| | | _FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad=false, bool use_punc=false); |
| | | |
| | | _FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad=false, bool use_punc=false); |
| | | |
| | | _FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad=false, bool use_punc=false); |
| | | |
| | | _FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT result,int n_index); |
| | | |
| | | _FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result); |
| | | |
| | | _FUNASRAPI void FunASRFreeResult(FUNASR_RESULT result); |
| | | |
| | | _FUNASRAPI void FunASRUninit(FUNASR_HANDLE handle); |
| | | |
| | | _FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT result); |
| | | |
| | | #ifdef __cplusplus |
| | |
| | | #ifndef ALIGNEDMEM_H |
| | | #define ALIGNEDMEM_H |
| | | |
| | | |
| | | |
| | | extern void *AlignedMalloc(size_t alignment, size_t required_bytes); |
| | | extern void AlignedFree(void *p); |
| | | |
| | |
| | | { |
| | | auto t = session->GetInputNameAllocated(nIndex, allocator); |
| | | inputName = t.get(); |
| | | |
| | | } |
| | | } |
| | | } |
| | |
| | | { |
| | | auto t = session->GetOutputNameAllocated(nIndex, allocator); |
| | | outputName = t.get(); |
| | | |
| | | } |
| | | } |
| | | } |
| | |
| | | }else{ |
| | | cout <<"No return data!"; |
| | | } |
| | | |
| | | } |
| | | { |
| | | lock_guard<mutex> guard(mtx); |
| | |
| | | Model *CreateModel(const char *path, int thread_num, bool quantize, bool use_vad, bool use_punc) |
| | | { |
| | | Model *mm; |
| | | |
| | | mm = new paraformer::Paraformer(path, thread_num, quantize, use_vad, use_punc); |
| | | |
| | | return mm; |
| | | } |
| | |
| | | |
| | | void ExtractFeats(vector<vector<float>> &vad_feats, vector<float> waves, bool input_finished); |
| | | |
| | | |
| | | private: |
| | | void OnlineFbank(vector<vector<float>> &vad_feats, vector<float> &waves); |
| | | |
| | | int OnlineLfrCmvn(vector<vector<float>> &vad_feats); |
| | | |
| | | static int ComputeFrameNum(int sample_length, int frame_sample_length, int frame_shift_sample_length) { |
| | | int frame_num = static_cast<int>((sample_length - frame_sample_length) / frame_shift_sample_length + 1); |
| | | |
| | | if (frame_num >= 1 && sample_length >= frame_sample_length) |
| | | return frame_num; |
| | | else |
| | |
| | | } |
| | | } |
| | | |
| | | string Paraformer::GreedySearch(float * in, int n_len ) |
| | | string Paraformer::GreedySearch(float * in, int n_len, int64_t token_nums) |
| | | { |
| | | vector<int> hyps; |
| | | int Tmax = n_len; |
| | | for (int i = 0; i < Tmax; i++) { |
| | | int max_idx; |
| | | float max_val; |
| | | FindMax(in + i * 8404, 8404, max_val, max_idx); |
| | | FindMax(in + i * token_nums, token_nums, max_val, max_idx); |
| | | hyps.push_back(max_idx); |
| | | } |
| | | |
| | |
| | | int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>()); |
| | | float* floatData = outputTensor[0].GetTensorMutableData<float>(); |
| | | auto encoder_out_lens = outputTensor[1].GetTensorMutableData<int64_t>(); |
| | | result = GreedySearch(floatData, *encoder_out_lens); |
| | | result = GreedySearch(floatData, *encoder_out_lens, outputShape[2]); |
| | | } |
| | | catch (std::exception const &e) |
| | | { |
| | |
| | | namespace paraformer { |
| | | |
| | | class Paraformer : public Model { |
| | | /** |
| | | * Author: Speech Lab of DAMO Academy, Alibaba Group |
| | | * Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition |
| | | * https://arxiv.org/pdf/2206.08317.pdf |
| | | */ |
| | | private: |
| | | //std::unique_ptr<knf::OnlineFbank> fbank_; |
| | | knf::FbankOptions fbank_opts; |
| | |
| | | vector<float> ApplyLfr(const vector<float> &in); |
| | | void ApplyCmvn(vector<float> *v); |
| | | |
| | | string GreedySearch( float* in, int n_len); |
| | | string GreedySearch( float* in, int n_len, int64_t token_nums); |
| | | |
| | | std::shared_ptr<Ort::Session> m_session; |
| | | Ort::Env env_; |