update streaming paraformer text process
| | |
| | | rec_result = inference_pipeline(audio_in=speech[sample_offset: sample_offset + stride_size], |
| | | param_dict=param_dict) |
| | | if len(rec_result) != 0: |
| | | final_result += " ".join(rec_result['text']) + " " |
| | | final_result += rec_result['text'] + " " |
| | | print(rec_result) |
| | | print(final_result) |
| | |
| | | rec_result = inference_pipeline(audio_in=speech[sample_offset: sample_offset + stride_size], |
| | | param_dict=param_dict) |
| | | if len(rec_result) != 0: |
| | | final_result += " ".join(rec_result['text']) + " " |
| | | final_result += rec_result['text'] + " " |
| | | print(rec_result) |
| | | print(final_result.strip()) |
| | |
| | | asr_result = speech2text(cache, raw_inputs[:, sample_offset: sample_offset + stride_size], input_lens) |
| | | if len(asr_result) != 0: |
| | | final_result += " ".join(asr_result) + " " |
| | | item = {'key': "utt", 'value': [final_result.strip()]} |
| | | item = {'key': "utt", 'value': final_result.strip()} |
| | | else: |
| | | input_lens = torch.tensor([raw_inputs.shape[1]]) |
| | | cache["encoder"]["is_final"] = is_final |
| | | asr_result = speech2text(cache, raw_inputs, input_lens) |
| | | item = {'key': "utt", 'value': asr_result} |
| | | item = {'key': "utt", 'value': " ".join(asr_result)} |
| | | |
| | | asr_result_list.append(item) |
| | | if is_final: |