From 2edb3a1bf04fbd44c132756c0e6610ca62eccc97 Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期五, 10 二月 2023 13:54:31 +0800
Subject: [PATCH] Merge pull request #93 from alibaba-damo-academy/dev_lzr
---
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/RESULTS.md | 23 +++++
funasr/bin/asr_inference_paraformer.py | 8 +
funasr/bin/asr_inference_uniasr.py | 2
funasr/bin/asr_inference_uniasr_vad.py | 2
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/RESULTS.md | 75 ++++++++++++++++++
funasr/bin/asr_inference.py | 2
funasr/bin/asr_inference_paraformer_vad.py | 19 +++-
funasr/bin/asr_inference_paraformer_timestamp.py | 2
funasr/bin/asr_inference_paraformer_vad_punc.py | 21 ++++-
funasr/utils/postprocess_utils.py | 6 +
egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/RESULTS.md | 25 ++++++
11 files changed, 168 insertions(+), 17 deletions(-)
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/RESULTS.md b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/RESULTS.md
new file mode 100644
index 0000000..5eeae37
--- /dev/null
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/RESULTS.md
@@ -0,0 +1,23 @@
+# Paraformer-Large
+- Model link: <https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/summary>
+- Model size: 220M
+
+# Environments
+- date: `Fri Feb 10 13:34:24 CST 2023`
+- python version: `3.7.12`
+- FunASR version: `0.1.6`
+- pytorch version: `pytorch 1.7.0`
+- Git hash: ``
+- Commit date: ``
+
+# Beachmark Results
+
+## AISHELL-1
+- Decode config:
+ - Decode without CTC
+ - Decode without LM
+
+| testset CER(%) | base model|finetune model |
+|:--------------:|:---------:|:-------------:|
+| dev | 1.75 |1.62 |
+| test | 1.95 |1.78 |
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/RESULTS.md b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/RESULTS.md
new file mode 100644
index 0000000..71d9fee
--- /dev/null
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/RESULTS.md
@@ -0,0 +1,25 @@
+# Paraformer-Large
+- Model link: <https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/summary>
+- Model size: 220M
+
+# Environments
+- date: `Fri Feb 10 13:34:24 CST 2023`
+- python version: `3.7.12`
+- FunASR version: `0.1.6`
+- pytorch version: `pytorch 1.7.0`
+- Git hash: ``
+- Commit date: ``
+
+# Beachmark Results
+
+## AISHELL-2
+- Decode config:
+ - Decode without CTC
+ - Decode without LM
+
+| testset | base model|finetune model|
+|:------------:|:---------:|:------------:|
+| dev_ios | 2.80 |2.60 |
+| test_android | 3.13 |2.84 |
+| test_ios | 2.85 |2.82 |
+| test_mic | 3.06 |2.88 |
diff --git a/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/RESULTS.md b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/RESULTS.md
new file mode 100644
index 0000000..ec95be3
--- /dev/null
+++ b/egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/RESULTS.md
@@ -0,0 +1,75 @@
+# Paraformer-Large
+- Model link: <https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary>
+- Model size: 220M
+
+# Environments
+- date: `Tue Nov 22 18:48:39 CST 2022`
+- python version: `3.7.12`
+- FunASR version: `0.1.0`
+- pytorch version: `pytorch 1.7.0`
+- Git hash: ``
+- Commit date: ``
+
+# Beachmark Results
+
+## AISHELL-1
+- Decode config:
+ - Decode without CTC
+ - Decode without LM
+
+| testset | CER(%)|
+|:---------:|:-----:|
+| dev | 1.75 |
+| test | 1.95 |
+
+## AISHELL-2
+- Decode config:
+ - Decode without CTC
+ - Decode without LM
+
+| testset | CER(%)|
+|:------------:|:-----:|
+| dev_ios | 2.80 |
+| test_android | 3.13 |
+| test_ios | 2.85 |
+| test_mic | 3.06 |
+
+## Wenetspeech
+- Decode config:
+ - Decode without CTC
+ - Decode without LM
+
+| testset | CER(%)|
+|:---------:|:-----:|
+| dev | 3.57 |
+| test | 6.97 |
+| test_net | 6.74 |
+
+## SpeechIO TIOBE
+- Decode config 1:
+ - Decode without CTC
+ - Decode without LM
+ - With text norm
+- Decode config 2:
+ - Decode without CTC
+ - Decode with Transformer-LM
+ - LM weight: 0.15
+ - With text norm
+
+| testset | w/o LM | w/ LM |
+|:------------------:|:----:|:----:|
+|SPEECHIO_ASR_ZH00001| 0.49 | 0.35 |
+|SPEECHIO_ASR_ZH00002| 3.23 | 2.86 |
+|SPEECHIO_ASR_ZH00003| 1.13 | 0.80 |
+|SPEECHIO_ASR_ZH00004| 1.33 | 1.10 |
+|SPEECHIO_ASR_ZH00005| 1.41 | 1.18 |
+|SPEECHIO_ASR_ZH00006| 5.25 | 4.85 |
+|SPEECHIO_ASR_ZH00007| 5.51 | 4.97 |
+|SPEECHIO_ASR_ZH00008| 3.69 | 3.18 |
+|SPEECHIO_ASR_ZH00009| 3.02 | 2.78 |
+|SPEECHIO_ASR_ZH000010| 3.35 | 2.99 |
+|SPEECHIO_ASR_ZH000011| 1.54 | 1.25 |
+|SPEECHIO_ASR_ZH000012| 2.06 | 1.68 |
+|SPEECHIO_ASR_ZH000013| 2.57 | 2.25 |
+|SPEECHIO_ASR_ZH000014| 3.86 | 3.08 |
+|SPEECHIO_ASR_ZH000015| 3.34 | 2.67 |
diff --git a/funasr/bin/asr_inference.py b/funasr/bin/asr_inference.py
index 16fa3e5..ca8f2bc 100644
--- a/funasr/bin/asr_inference.py
+++ b/funasr/bin/asr_inference.py
@@ -453,7 +453,7 @@
ibest_writer["score"][key] = str(hyp.score)
if text is not None:
- text_postprocessed = postprocess_utils.sentence_postprocess(token)
+ text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
item = {'key': key, 'value': text_postprocessed}
asr_result_list.append(item)
finish_count += 1
diff --git a/funasr/bin/asr_inference_paraformer.py b/funasr/bin/asr_inference_paraformer.py
index 709c5bf..6c5acfc 100644
--- a/funasr/bin/asr_inference_paraformer.py
+++ b/funasr/bin/asr_inference_paraformer.py
@@ -428,7 +428,11 @@
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
)
- hotword_list_or_file = param_dict['hotword']
+ if param_dict is not None:
+ hotword_list_or_file = param_dict.get('hotword')
+ else:
+ hotword_list_or_file = None
+
if ngpu >= 1 and torch.cuda.is_available():
device = "cuda"
else:
@@ -539,7 +543,7 @@
ibest_writer["rtf"][key] = rtf_cur
if text is not None:
- text_postprocessed = postprocess_utils.sentence_postprocess(token)
+ text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
item = {'key': key, 'value': text_postprocessed}
asr_result_list.append(item)
finish_count += 1
diff --git a/funasr/bin/asr_inference_paraformer_timestamp.py b/funasr/bin/asr_inference_paraformer_timestamp.py
index 7e2e414..7da48e2 100644
--- a/funasr/bin/asr_inference_paraformer_timestamp.py
+++ b/funasr/bin/asr_inference_paraformer_timestamp.py
@@ -436,7 +436,7 @@
ibest_writer["score"][key] = str(hyp.score)
if text is not None:
- text_postprocessed = postprocess_utils.sentence_postprocess(token)
+ text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
item = {'key': key, 'value': text_postprocessed}
asr_result_list.append(item)
finish_count += 1
diff --git a/funasr/bin/asr_inference_paraformer_vad.py b/funasr/bin/asr_inference_paraformer_vad.py
index 2832504..dbb2719 100644
--- a/funasr/bin/asr_inference_paraformer_vad.py
+++ b/funasr/bin/asr_inference_paraformer_vad.py
@@ -241,6 +241,11 @@
allow_variable_data_keys=allow_variable_data_keys,
inference=True,
)
+
+ if param_dict is not None:
+ use_timestamp = param_dict.get('use_timestamp', True)
+ else:
+ use_timestamp = True
finish_count = 0
file_count = 1
@@ -284,8 +289,10 @@
text, token, token_int = result[0], result[1], result[2]
time_stamp = None if len(result) < 4 else result[3]
-
- postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
+ if use_timestamp and time_stamp is not None:
+ postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
+ else:
+ postprocessed_result = postprocess_utils.sentence_postprocess(token)
text_postprocessed = ""
time_stamp_postprocessed = ""
text_postprocessed_punc = postprocessed_result
@@ -293,9 +300,11 @@
text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
postprocessed_result[1], \
postprocessed_result[2]
- text_postprocessed_punc = text_postprocessed
- if len(word_lists) > 0 and text2punc is not None:
- text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
+ else:
+ text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
+ text_postprocessed_punc = text_postprocessed
+ if len(word_lists) > 0 and text2punc is not None:
+ text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
item = {'key': key, 'value': text_postprocessed_punc}
diff --git a/funasr/bin/asr_inference_paraformer_vad_punc.py b/funasr/bin/asr_inference_paraformer_vad_punc.py
index 7d18e02..c4bb61b 100644
--- a/funasr/bin/asr_inference_paraformer_vad_punc.py
+++ b/funasr/bin/asr_inference_paraformer_vad_punc.py
@@ -570,6 +570,11 @@
allow_variable_data_keys=allow_variable_data_keys,
inference=True,
)
+
+ if param_dict is not None:
+ use_timestamp = param_dict.get('use_timestamp', True)
+ else:
+ use_timestamp = True
finish_count = 0
file_count = 1
@@ -612,8 +617,11 @@
result = result_segments[0]
text, token, token_int = result[0], result[1], result[2]
time_stamp = None if len(result) < 4 else result[3]
-
- postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
+
+ if use_timestamp and time_stamp is not None:
+ postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
+ else:
+ postprocessed_result = postprocess_utils.sentence_postprocess(token)
text_postprocessed = ""
time_stamp_postprocessed = ""
text_postprocessed_punc = postprocessed_result
@@ -621,9 +629,12 @@
text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
postprocessed_result[1], \
postprocessed_result[2]
- text_postprocessed_punc = text_postprocessed
- if len(word_lists) > 0 and text2punc is not None:
- text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
+ else:
+ text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
+
+ text_postprocessed_punc = text_postprocessed
+ if len(word_lists) > 0 and text2punc is not None:
+ text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
item = {'key': key, 'value': text_postprocessed_punc}
if text_postprocessed != "":
diff --git a/funasr/bin/asr_inference_uniasr.py b/funasr/bin/asr_inference_uniasr.py
index cfec9a0..0a5824c 100644
--- a/funasr/bin/asr_inference_uniasr.py
+++ b/funasr/bin/asr_inference_uniasr.py
@@ -492,7 +492,7 @@
ibest_writer["score"][key] = str(hyp.score)
if text is not None:
- text_postprocessed = postprocess_utils.sentence_postprocess(token)
+ text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
item = {'key': key, 'value': text_postprocessed}
asr_result_list.append(item)
finish_count += 1
diff --git a/funasr/bin/asr_inference_uniasr_vad.py b/funasr/bin/asr_inference_uniasr_vad.py
index cfec9a0..0a5824c 100644
--- a/funasr/bin/asr_inference_uniasr_vad.py
+++ b/funasr/bin/asr_inference_uniasr_vad.py
@@ -492,7 +492,7 @@
ibest_writer["score"][key] = str(hyp.score)
if text is not None:
- text_postprocessed = postprocess_utils.sentence_postprocess(token)
+ text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
item = {'key': key, 'value': text_postprocessed}
asr_result_list.append(item)
finish_count += 1
diff --git a/funasr/utils/postprocess_utils.py b/funasr/utils/postprocess_utils.py
index 4da0d59..575fb90 100644
--- a/funasr/utils/postprocess_utils.py
+++ b/funasr/utils/postprocess_utils.py
@@ -232,5 +232,9 @@
return sentence, ts_lists, real_word_lists
else:
word_lists = abbr_dispose(word_lists)
+ real_word_lists = []
+ for ch in word_lists:
+ if ch != ' ':
+ real_word_lists.append(ch)
sentence = ''.join(word_lists).strip()
- return sentence
+ return sentence, real_word_lists
--
Gitblit v1.9.1