| | |
| | | } |
| | | |
| | | // offline |
| | | void ParaformerTorch::InitAsr(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){ |
| | | void ParaformerTorch::InitAsr(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, const std::string &token_file, int thread_num){ |
| | | LoadConfigFromYaml(am_config.c_str()); |
| | | // knf options |
| | | fbank_opts_.frame_opts.dither = 0; |
| | |
| | | fbank_opts_.energy_floor = 0; |
| | | fbank_opts_.mel_opts.debug_mel = false; |
| | | |
| | | vocab = new Vocab(am_config.c_str()); |
| | | phone_set_ = new PhoneSet(am_config.c_str()); |
| | | vocab = new Vocab(token_file.c_str()); |
| | | phone_set_ = new PhoneSet(token_file.c_str()); |
| | | LoadCmvn(am_cmvn.c_str()); |
| | | |
| | | torch::DeviceType device = at::kCPU; |
| | |
| | | if(asr_feats.size() != 0){ |
| | | LfrCmvn(asr_feats); |
| | | } |
| | | int32_t num_frames = asr_feats.size() / feature_dim; |
| | | int32_t num_frames = asr_feats.size(); |
| | | paraformer_length.emplace_back(num_frames); |
| | | if(max_size < asr_feats.size()){ |
| | | max_size = asr_feats.size(); |
| | | if(max_size < asr_feats.size()*feature_dim){ |
| | | max_size = asr_feats.size()*feature_dim; |
| | | max_frames = num_frames; |
| | | } |
| | | |
| | |
| | | } |
| | | } |
| | | results.push_back(result); |
| | | if (wfst_decoder){ |
| | | wfst_decoder->StartUtterance(); |
| | | } |
| | | } |
| | | } |
| | | catch (std::exception const &e) |