python/FunASR-XL.git

			@@ -50,17 +50,17 @@
			(Note: 🤗 represents the Huggingface model zoo link, ⭐ represents the ModelScope model zoo link)


			\| Model Name \| Task Details \| Training Date \| Parameters \|
			\|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------:\|:---------------------------------------------------------------------------:\|:--------------------------------:\|:----------:\|
			\| <nobr>paraformer-zh ([⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) [🤗]() )</nobr> \| speech recognition, with timestamps, non-streaming \| 60000 hours, Mandarin \| 220M \|
			\| <nobr>paraformer-zh-spk ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary) [🤗]() )</nobr> \| speech recognition with speaker diarization, with timestamps, non-streaming \| 60000 hours, Mandarin \| 220M \|
			\| <nobr>paraformer-zh-online ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [🤗]() )</nobr> \| speech recognition, streaming \| 60000 hours, Mandarin \| 220M \|
			\| <nobr>paraformer-en ( [⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [🤗]() )</nobr> \| speech recognition, with timestamps, non-streaming \| 50000 hours, English \| 220M \|
			\| <nobr>paraformer-en-spk ([🤗]() [⭐]() )</nobr> \| speech recognition with speaker diarization, non-streaming \| 50000 hours, English \| 220M \|
			\| <nobr>conformer-en ( [⭐](https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [🤗]() )</nobr> \| speech recognition, non-streaming \| 50000 hours, English \| 220M \|
			\| <nobr>ct-punc ( [⭐](https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [🤗]() )</nobr> \| punctuation restoration \| 100M, Mandarin and English \| 1.1G \|
			\| <nobr>fsmn-vad ( [⭐](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [🤗]() )</nobr> \| voice activity detection \| 5000 hours, Mandarin and English \| 0.4M \|
			\| <nobr>fa-zh ( [⭐](https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [🤗]() )</nobr> \| timestamp prediction \| 5000 hours, Mandarin \| 38M \|
			\| Model Name \| Task Details \| Training Date \| Parameters \|
			\|:------------------------------------------------------------------------------------------------------------------------------------------------------------------:\|:---------------------------------------------------------------------------:\|:--------------------------------:\|:----------:\|
			\| paraformer-zh <br> ([⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) [🤗]() ) \| speech recognition, with timestamps, non-streaming \| 60000 hours, Mandarin \| 220M \|
			\| paraformer-zh-spk <br> ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary) [🤗]() ) \| speech recognition with speaker diarization, with timestamps, non-streaming \| 60000 hours, Mandarin \| 220M \|
			\| <nobr>paraformer-zh-online <br> ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [🤗]() )</nobr> \| speech recognition, streaming \| 60000 hours, Mandarin \| 220M \|
			\| paraformer-en <br> ( [⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [🤗]() ) \| speech recognition, with timestamps, non-streaming \| 50000 hours, English \| 220M \|
			\| paraformer-en-spk <br> ([⭐]()[🤗]() ) \| speech recognition with speaker diarization, non-streaming \| Undo \| Undo \|
			\| conformer-en <br> ( [⭐](https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [🤗]() ) \| speech recognition, non-streaming \| 50000 hours, English \| 220M \|
			\| ct-punc <br> ( [⭐](https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [🤗]() ) \| punctuation restoration \| 100M, Mandarin and English \| 1.1G \|
			\| fsmn-vad <br> ( [⭐](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [🤗]() ) \| voice activity detection \| 5000 hours, Mandarin and English \| 0.4M \|
			\| fa-zh <br> ( [⭐](https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [🤗]() ) \| timestamp prediction \| 5000 hours, Mandarin \| 38M \|

			@@ -50,17 +50,17 @@
			(Note: 🤗 represents the Huggingface model zoo link, ⭐ represents the ModelScope model zoo link)


			\| Model Name \| Task Details \| Training Date \| Parameters \|
			\|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------:\|:---------------------------------------------------------------------------:\|:--------------------------------:\|:----------:\|
			\| <nobr>paraformer-zh ([⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) [🤗]() )</nobr> \| speech recognition, with timestamps, non-streaming \| 60000 hours, Mandarin \| 220M \|
			\| <nobr>paraformer-zh-spk ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary) [🤗]() )</nobr> \| speech recognition with speaker diarization, with timestamps, non-streaming \| 60000 hours, Mandarin \| 220M \|
			\| <nobr>paraformer-zh-online ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [🤗]() )</nobr> \| speech recognition, streaming \| 60000 hours, Mandarin \| 220M \|
			\| <nobr>paraformer-en ( [⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [🤗]() )</nobr> \| speech recognition, with timestamps, non-streaming \| 50000 hours, English \| 220M \|
			\| <nobr>paraformer-en-spk ([🤗]() [⭐]() )</nobr> \| speech recognition with speaker diarization, non-streaming \| 50000 hours, English \| 220M \|
			\| <nobr>conformer-en ( [⭐](https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [🤗]() )</nobr> \| speech recognition, non-streaming \| 50000 hours, English \| 220M \|
			\| <nobr>ct-punc ( [⭐](https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [🤗]() )</nobr> \| punctuation restoration \| 100M, Mandarin and English \| 1.1G \|
			\| <nobr>fsmn-vad ( [⭐](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [🤗]() )</nobr> \| voice activity detection \| 5000 hours, Mandarin and English \| 0.4M \|
			\| <nobr>fa-zh ( [⭐](https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [🤗]() )</nobr> \| timestamp prediction \| 5000 hours, Mandarin \| 38M \|
			\| Model Name \| Task Details \| Training Date \| Parameters \|
			\|:------------------------------------------------------------------------------------------------------------------------------------------------------------------:\|:---------------------------------------------------------------------------:\|:--------------------------------:\|:----------:\|
			\| paraformer-zh <br> ([⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) [🤗]() ) \| speech recognition, with timestamps, non-streaming \| 60000 hours, Mandarin \| 220M \|
			\| paraformer-zh-spk <br> ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary) [🤗]() ) \| speech recognition with speaker diarization, with timestamps, non-streaming \| 60000 hours, Mandarin \| 220M \|
			\| <nobr>paraformer-zh-online <br> ( [⭐](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [🤗]() )</nobr> \| speech recognition, streaming \| 60000 hours, Mandarin \| 220M \|
			\| paraformer-en <br> ( [⭐](https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [🤗]() ) \| speech recognition, with timestamps, non-streaming \| 50000 hours, English \| 220M \|
			\| paraformer-en-spk <br> ([⭐]()[🤗]() ) \| speech recognition with speaker diarization, non-streaming \| Undo \| Undo \|
			\| conformer-en <br> ( [⭐](https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [🤗]() ) \| speech recognition, non-streaming \| 50000 hours, English \| 220M \|
			\| ct-punc <br> ( [⭐](https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [🤗]() ) \| punctuation restoration \| 100M, Mandarin and English \| 1.1G \|
			\| fsmn-vad <br> ( [⭐](https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [🤗]() ) \| voice activity detection \| 5000 hours, Mandarin and English \| 0.4M \|
			\| fa-zh <br> ( [⭐](https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [🤗]() ) \| timestamp prediction \| 5000 hours, Mandarin \| 38M \|