python/FunASR-XL.git

			@@ -41,8 +41,6 @@
			from funasr.utils import asr_utils, wav_utils, postprocess_utils
			import pdb

			header_colors = '\033[95m'
			end_colors = '\033[0m'

			global_asr_language: str = 'zh-cn'
			global_sample_rate: Union[int, Dict[Any, int]] = {

			@@ -47,15 +47,11 @@

			def seg_tokenize(txt, seg_dict):
			out_txt = ""
			pattern = re.compile(r"([\u4E00-\u9FA5A-Za-z0-9])")
			for word in txt:
			if pattern.match(word):
			if word in seg_dict:
			out_txt += seg_dict[word] + " "
			else:
			out_txt += "<unk>" + " "
			if word in seg_dict:
			out_txt += seg_dict[word] + " "
			else:
			continue
			out_txt += "<unk>" + " "
			return out_txt.strip().split()

			def seg_tokenize_wo_pattern(txt, seg_dict):

			@@ -452,7 +452,7 @@
			def test_uniasr_2pass_zhcn_16k_common_vocab8358_offline(self):
			inference_pipeline = pipeline(
			task=Tasks.auto_speech_recognition,
			model='damo/speech_UniASauto_speech_recognitionR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline')
			model='damo/speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline')
			rec_result = inference_pipeline(
			audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav',
			param_dict={"decoding_model": "offline"})

	funasr/bin/asr_inference_mfcca.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/datasets/preprocessor.py	10 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	tests/test_asr_inference_pipeline.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史