| | |
| | | from funasr.tasks.vad import VADTask |
| | | from funasr.utils.timestamp_tools import time_stamp_lfr6 |
| | | from funasr.bin.punctuation_infer import Text2Punc |
| | | from funasr.torch_utils.forward_adaptor import ForwardAdaptor |
| | | from funasr.datasets.preprocessor import CommonPreprocessor |
| | | from funasr.punctuation.text_preprocessor import split_to_mini_sentence |
| | | |
| | | header_colors = '\033[95m' |
| | | end_colors = '\033[0m' |
| | | |
| | | global_asr_language: str = 'zh-cn' |
| | | global_sample_rate: Union[int, Dict[Any, int]] = { |
| | | 'audio_fs': 16000, |
| | | 'model_fs': 16000 |
| | | } |
| | | |
| | | class Speech2Text: |
| | | """Speech2Text class |
| | |
| | | def _forward(data_path_and_name_and_type, |
| | | raw_inputs: Union[np.ndarray, torch.Tensor] = None, |
| | | output_dir_v2: Optional[str] = None, |
| | | fs: dict = None, |
| | | param_dict: dict = None, |
| | | ): |
| | | # 3. Build data-iterator |
| | |
| | | loader = ASRTask.build_streaming_iterator( |
| | | data_path_and_name_and_type, |
| | | dtype=dtype, |
| | | fs=fs, |
| | | batch_size=1, |
| | | key_file=key_file, |
| | | num_workers=num_workers, |
| | |
| | | punc_id_list = "" |
| | | text_postprocessed_punc = "" |
| | | |
| | | item = {'key': key, 'value': text_postprocessed_punc_time_stamp, 'text': text_postprocessed, |
| | | 'time_stamp': time_stamp_postprocessed, 'punc': punc_id_list, 'token': token} |
| | | if outputs_dict: |
| | | item = {'text_punc': text_postprocessed_punc, 'text': text_postprocessed, |
| | | 'punc_id': punc_id_list, 'token': token, 'time_stamp': time_stamp_postprocessed} |
| | | item = {'key': key, 'value': item} |
| | | item = {'key': key, 'value': text_postprocessed_punc, 'text_postprocessed': text_postprocessed, |
| | | 'time_stamp': time_stamp_postprocessed, 'token': token} |
| | | asr_result_list.append(item) |
| | | finish_count += 1 |
| | | # asr_utils.print_progress(finish_count / file_count) |