| | |
| | | export_mode = param_dict.get("export_mode", False) |
| | | else: |
| | | hotword_list_or_file = None |
| | | clas_scale = param_dict.get('clas_scale', 1.0) |
| | | |
| | | if kwargs.get("device", None) == "cpu": |
| | | ngpu = 0 |
| | |
| | | penalty=penalty, |
| | | nbest=nbest, |
| | | hotword_list_or_file=hotword_list_or_file, |
| | | clas_scale=clas_scale, |
| | | ) |
| | | |
| | | speech2text = Speech2TextParaformer(**speech2text_kwargs) |
| | |
| | | sorted_data = sorted(data_with_index, key=lambda x: x[0][1] - x[0][0]) |
| | | results_sorted = [] |
| | | |
| | | if not len(sorted_data): |
| | | key = keys[0] |
| | | # no active segments after VAD |
| | | if writer is not None: |
| | | # Write empty results |
| | | ibest_writer["token"][key] = "" |
| | | ibest_writer["token_int"][key] = "" |
| | | ibest_writer["vad"][key] = "" |
| | | ibest_writer["text"][key] = "" |
| | | ibest_writer["text_with_punc"][key] = "" |
| | | if use_timestamp: |
| | | ibest_writer["time_stamp"][key] = "" |
| | | |
| | | logging.info("decoding, utt: {}, empty speech".format(key)) |
| | | continue |
| | | |
| | | batch_size_token_ms = batch_size_token*60 |
| | | if speech2text.device == "cpu": |
| | | batch_size_token_ms = 0 |
| | |
| | | left_context=left_context, |
| | | right_context=right_context, |
| | | ) |
| | | speech2text = Speech2TextTransducer.from_pretrained( |
| | | model_tag=model_tag, |
| | | **speech2text_kwargs, |
| | | ) |
| | | speech2text = Speech2TextTransducer(**speech2text_kwargs) |
| | | |
| | | def _forward(data_path_and_name_and_type, |
| | | raw_inputs: Union[np.ndarray, torch.Tensor] = None, |