Merge pull request #93 from alibaba-damo-academy/dev_lzr
update paraformer-large model RESULTS.md and support for turning off timestamps
| New file |
| | |
| | | # Paraformer-Large |
| | | - Model link: <https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch/summary> |
| | | - Model size: 220M |
| | | |
| | | # Environments |
| | | - date: `Fri Feb 10 13:34:24 CST 2023` |
| | | - python version: `3.7.12` |
| | | - FunASR version: `0.1.6` |
| | | - pytorch version: `pytorch 1.7.0` |
| | | - Git hash: `` |
| | | - Commit date: `` |
| | | |
| | | # Beachmark Results |
| | | |
| | | ## AISHELL-1 |
| | | - Decode config: |
| | | - Decode without CTC |
| | | - Decode without LM |
| | | |
| | | | testset CER(%) | base model|finetune model | |
| | | |:--------------:|:---------:|:-------------:| |
| | | | dev | 1.75 |1.62 | |
| | | | test | 1.95 |1.78 | |
| New file |
| | |
| | | # Paraformer-Large |
| | | - Model link: <https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch/summary> |
| | | - Model size: 220M |
| | | |
| | | # Environments |
| | | - date: `Fri Feb 10 13:34:24 CST 2023` |
| | | - python version: `3.7.12` |
| | | - FunASR version: `0.1.6` |
| | | - pytorch version: `pytorch 1.7.0` |
| | | - Git hash: `` |
| | | - Commit date: `` |
| | | |
| | | # Beachmark Results |
| | | |
| | | ## AISHELL-2 |
| | | - Decode config: |
| | | - Decode without CTC |
| | | - Decode without LM |
| | | |
| | | | testset | base model|finetune model| |
| | | |:------------:|:---------:|:------------:| |
| | | | dev_ios | 2.80 |2.60 | |
| | | | test_android | 3.13 |2.84 | |
| | | | test_ios | 2.85 |2.82 | |
| | | | test_mic | 3.06 |2.88 | |
| New file |
| | |
| | | # Paraformer-Large |
| | | - Model link: <https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary> |
| | | - Model size: 220M |
| | | |
| | | # Environments |
| | | - date: `Tue Nov 22 18:48:39 CST 2022` |
| | | - python version: `3.7.12` |
| | | - FunASR version: `0.1.0` |
| | | - pytorch version: `pytorch 1.7.0` |
| | | - Git hash: `` |
| | | - Commit date: `` |
| | | |
| | | # Beachmark Results |
| | | |
| | | ## AISHELL-1 |
| | | - Decode config: |
| | | - Decode without CTC |
| | | - Decode without LM |
| | | |
| | | | testset | CER(%)| |
| | | |:---------:|:-----:| |
| | | | dev | 1.75 | |
| | | | test | 1.95 | |
| | | |
| | | ## AISHELL-2 |
| | | - Decode config: |
| | | - Decode without CTC |
| | | - Decode without LM |
| | | |
| | | | testset | CER(%)| |
| | | |:------------:|:-----:| |
| | | | dev_ios | 2.80 | |
| | | | test_android | 3.13 | |
| | | | test_ios | 2.85 | |
| | | | test_mic | 3.06 | |
| | | |
| | | ## Wenetspeech |
| | | - Decode config: |
| | | - Decode without CTC |
| | | - Decode without LM |
| | | |
| | | | testset | CER(%)| |
| | | |:---------:|:-----:| |
| | | | dev | 3.57 | |
| | | | test | 6.97 | |
| | | | test_net | 6.74 | |
| | | |
| | | ## SpeechIO TIOBE |
| | | - Decode config 1: |
| | | - Decode without CTC |
| | | - Decode without LM |
| | | - With text norm |
| | | - Decode config 2: |
| | | - Decode without CTC |
| | | - Decode with Transformer-LM |
| | | - LM weight: 0.15 |
| | | - With text norm |
| | | |
| | | | testset | w/o LM | w/ LM | |
| | | |:------------------:|:----:|:----:| |
| | | |SPEECHIO_ASR_ZH00001| 0.49 | 0.35 | |
| | | |SPEECHIO_ASR_ZH00002| 3.23 | 2.86 | |
| | | |SPEECHIO_ASR_ZH00003| 1.13 | 0.80 | |
| | | |SPEECHIO_ASR_ZH00004| 1.33 | 1.10 | |
| | | |SPEECHIO_ASR_ZH00005| 1.41 | 1.18 | |
| | | |SPEECHIO_ASR_ZH00006| 5.25 | 4.85 | |
| | | |SPEECHIO_ASR_ZH00007| 5.51 | 4.97 | |
| | | |SPEECHIO_ASR_ZH00008| 3.69 | 3.18 | |
| | | |SPEECHIO_ASR_ZH00009| 3.02 | 2.78 | |
| | | |SPEECHIO_ASR_ZH000010| 3.35 | 2.99 | |
| | | |SPEECHIO_ASR_ZH000011| 1.54 | 1.25 | |
| | | |SPEECHIO_ASR_ZH000012| 2.06 | 1.68 | |
| | | |SPEECHIO_ASR_ZH000013| 2.57 | 2.25 | |
| | | |SPEECHIO_ASR_ZH000014| 3.86 | 3.08 | |
| | | |SPEECHIO_ASR_ZH000015| 3.34 | 2.67 | |
| | |
| | | ibest_writer["score"][key] = str(hyp.score) |
| | | |
| | | if text is not None: |
| | | text_postprocessed = postprocess_utils.sentence_postprocess(token) |
| | | text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) |
| | | item = {'key': key, 'value': text_postprocessed} |
| | | asr_result_list.append(item) |
| | | finish_count += 1 |
| | |
| | | format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", |
| | | ) |
| | | |
| | | hotword_list_or_file = param_dict['hotword'] |
| | | if param_dict is not None: |
| | | hotword_list_or_file = param_dict.get('hotword') |
| | | else: |
| | | hotword_list_or_file = None |
| | | |
| | | if ngpu >= 1 and torch.cuda.is_available(): |
| | | device = "cuda" |
| | | else: |
| | |
| | | ibest_writer["rtf"][key] = rtf_cur |
| | | |
| | | if text is not None: |
| | | text_postprocessed = postprocess_utils.sentence_postprocess(token) |
| | | text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) |
| | | item = {'key': key, 'value': text_postprocessed} |
| | | asr_result_list.append(item) |
| | | finish_count += 1 |
| | |
| | | ibest_writer["score"][key] = str(hyp.score) |
| | | |
| | | if text is not None: |
| | | text_postprocessed = postprocess_utils.sentence_postprocess(token) |
| | | text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) |
| | | item = {'key': key, 'value': text_postprocessed} |
| | | asr_result_list.append(item) |
| | | finish_count += 1 |
| | |
| | | inference=True, |
| | | ) |
| | | |
| | | if param_dict is not None: |
| | | use_timestamp = param_dict.get('use_timestamp', True) |
| | | else: |
| | | use_timestamp = True |
| | | |
| | | finish_count = 0 |
| | | file_count = 1 |
| | | lfr_factor = 6 |
| | |
| | | text, token, token_int = result[0], result[1], result[2] |
| | | time_stamp = None if len(result) < 4 else result[3] |
| | | |
| | | |
| | | if use_timestamp and time_stamp is not None: |
| | | postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp) |
| | | else: |
| | | postprocessed_result = postprocess_utils.sentence_postprocess(token) |
| | | text_postprocessed = "" |
| | | time_stamp_postprocessed = "" |
| | | text_postprocessed_punc = postprocessed_result |
| | |
| | | text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \ |
| | | postprocessed_result[1], \ |
| | | postprocessed_result[2] |
| | | else: |
| | | text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1] |
| | | text_postprocessed_punc = text_postprocessed |
| | | if len(word_lists) > 0 and text2punc is not None: |
| | | text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20) |
| | |
| | | inference=True, |
| | | ) |
| | | |
| | | if param_dict is not None: |
| | | use_timestamp = param_dict.get('use_timestamp', True) |
| | | else: |
| | | use_timestamp = True |
| | | |
| | | finish_count = 0 |
| | | file_count = 1 |
| | | lfr_factor = 6 |
| | |
| | | text, token, token_int = result[0], result[1], result[2] |
| | | time_stamp = None if len(result) < 4 else result[3] |
| | | |
| | | if use_timestamp and time_stamp is not None: |
| | | postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp) |
| | | else: |
| | | postprocessed_result = postprocess_utils.sentence_postprocess(token) |
| | | text_postprocessed = "" |
| | | time_stamp_postprocessed = "" |
| | | text_postprocessed_punc = postprocessed_result |
| | |
| | | text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \ |
| | | postprocessed_result[1], \ |
| | | postprocessed_result[2] |
| | | else: |
| | | text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1] |
| | | |
| | | text_postprocessed_punc = text_postprocessed |
| | | if len(word_lists) > 0 and text2punc is not None: |
| | | text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20) |
| | |
| | | ibest_writer["score"][key] = str(hyp.score) |
| | | |
| | | if text is not None: |
| | | text_postprocessed = postprocess_utils.sentence_postprocess(token) |
| | | text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) |
| | | item = {'key': key, 'value': text_postprocessed} |
| | | asr_result_list.append(item) |
| | | finish_count += 1 |
| | |
| | | ibest_writer["score"][key] = str(hyp.score) |
| | | |
| | | if text is not None: |
| | | text_postprocessed = postprocess_utils.sentence_postprocess(token) |
| | | text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) |
| | | item = {'key': key, 'value': text_postprocessed} |
| | | asr_result_list.append(item) |
| | | finish_count += 1 |
| | |
| | | return sentence, ts_lists, real_word_lists |
| | | else: |
| | | word_lists = abbr_dispose(word_lists) |
| | | real_word_lists = [] |
| | | for ch in word_lists: |
| | | if ch != ' ': |
| | | real_word_lists.append(ch) |
| | | sentence = ''.join(word_lists).strip() |
| | | return sentence |
| | | return sentence, real_word_lists |