| | |
| | | set_all_random_seed(kwargs.get("seed", 0)) |
| | | |
| | | device = kwargs.get("device", "cuda") |
| | | if not torch.cuda.is_available() or kwargs.get("ngpu", 0): |
| | | if not torch.cuda.is_available() or kwargs.get("ngpu", 0) == 0: |
| | | device = "cpu" |
| | | kwargs["batch_size"] = 1 |
| | | kwargs["device"] = device |
| | |
| | | asr_result_list = [] |
| | | num_samples = len(data_list) |
| | | disable_pbar = kwargs.get("disable_pbar", False) |
| | | pbar = tqdm(colour="blue", total=num_samples+1, dynamic_ncols=True) if not disable_pbar else None |
| | | pbar = tqdm(colour="blue", total=num_samples, dynamic_ncols=True) if not disable_pbar else None |
| | | time_speech_total = 0.0 |
| | | time_escape_total = 0.0 |
| | | for beg_idx in range(0, num_samples, batch_size): |
| | |
| | | |
| | | end_asr_total = time.time() |
| | | time_escape_total_per_sample = end_asr_total - beg_asr_total |
| | | pbar_sample.update(1) |
| | | pbar_sample.set_description(f"rtf_avg_per_sample: {time_escape_total_per_sample / time_speech_total_per_sample:0.3f}, " |
| | | f"time_speech_total_per_sample: {time_speech_total_per_sample: 0.3f}, " |
| | | f"time_escape_total_per_sample: {time_escape_total_per_sample:0.3f}") |
| | |
| | | result[k] = restored_data[j][k] |
| | | else: |
| | | result[k] = torch.cat([result[k], restored_data[j][k]], dim=0) |
| | | elif k == 'text': |
| | | elif k == 'raw_text': |
| | | if k not in result: |
| | | result[k] = restored_data[j][k] |
| | | else: |
| | |
| | | else: # punc_segment |
| | | sentence_list = timestamp_sentence(punc_res[0]['punc_array'], \ |
| | | result['timestamp'], \ |
| | | result['text']) |
| | | result['raw_text']) |
| | | distribute_spk(sentence_list, sv_output) |
| | | result['sentence_info'] = sentence_list |
| | | |