python/FunASR-XL.git

			@@ -38,7 +38,7 @@
			batch_size_token=5000, batch_size_token_threshold_s=40, max_single_segment_time=6000)
			print(rec_result)
			```
			Where,
			其中：
			- `batch_size_token` 表示采用动态batch，batch中总token数为 `batch_size_token`，1 token = 60 ms.
			- `batch_size_token_threshold_s`: 表示音频时长超过 `batch_size_token_threshold_s`阈值是，batch数设置为1, 单位为s.
			- `max_single_segment_time`: 表示VAD最大切割音频时长, 单位是ms.

			@@ -1922,9 +1922,6 @@
			):

			from funasr.tasks.whisper import ASRTask
			from funasr.utils.whisper_utils.transcribe import transcribe
			from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram
			from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode

			# 1. Build ASR model
			scorers = {}
			@@ -1987,6 +1984,10 @@

			"""

			from funasr.utils.whisper_utils.transcribe import transcribe
			from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram
			from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode

			speech = speech[0]
			speech = pad_or_trim(speech)
			mel = log_mel_spectrogram(speech).to(self.device)

	egs_modelscope/asr/TEMPLATE/README_zh.md	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/asr_infer.py	7 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史