From 1d1ef01b4e23630a99a3be7e9d1dce9550a793e9 Mon Sep 17 00:00:00 2001
From: yhliang <68215459+yhliang-aslp@users.noreply.github.com>
Date: 星期四, 11 五月 2023 16:26:24 +0800
Subject: [PATCH] Merge branch 'main' into dev_smohan
---
funasr/bin/asr_inference_paraformer_streaming.py | 24 ++++++------------------
1 files changed, 6 insertions(+), 18 deletions(-)
diff --git a/funasr/bin/asr_inference_paraformer_streaming.py b/funasr/bin/asr_inference_paraformer_streaming.py
index bf5590c..4f04d02 100644
--- a/funasr/bin/asr_inference_paraformer_streaming.py
+++ b/funasr/bin/asr_inference_paraformer_streaming.py
@@ -239,7 +239,7 @@
feats_len = torch.tensor([feats_chunk2.shape[1]])
results_chunk2 = self.infer(feats_chunk2, feats_len, cache)
- return ["".join(results_chunk1 + results_chunk2)]
+ return [" ".join(results_chunk1 + results_chunk2)]
results = self.infer(feats, feats_len, cache)
@@ -299,12 +299,9 @@
# Change integer-ids to tokens
token = self.converter.ids2tokens(token_int)
+ token = " ".join(token)
- if self.tokenizer is not None:
- text = self.tokenizer.tokens2text(token)
- else:
- text = None
- results.append(text)
+ results.append(token)
# assert check_return_type(results)
return results
@@ -555,13 +552,13 @@
input_lens = torch.tensor([stride_size])
asr_result = speech2text(cache, raw_inputs[:, sample_offset: sample_offset + stride_size], input_lens)
if len(asr_result) != 0:
- final_result += asr_result[0]
- item = {'key': "utt", 'value': [final_result]}
+ final_result += " ".join(asr_result) + " "
+ item = {'key': "utt", 'value': final_result.strip()}
else:
input_lens = torch.tensor([raw_inputs.shape[1]])
cache["encoder"]["is_final"] = is_final
asr_result = speech2text(cache, raw_inputs, input_lens)
- item = {'key': "utt", 'value': asr_result}
+ item = {'key': "utt", 'value': " ".join(asr_result)}
asr_result_list.append(item)
if is_final:
@@ -750,12 +747,3 @@
if __name__ == "__main__":
main()
- # from modelscope.pipelines import pipeline
- # from modelscope.utils.constant import Tasks
- #
- # inference_16k_pipline = pipeline(
- # task=Tasks.auto_speech_recognition,
- # model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch')
- #
- # rec_result = inference_16k_pipline(audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
- # print(rec_result)
--
Gitblit v1.9.1