python/FunASR-XL.git

			@@ -1,5 +1,6 @@
			import argparse
			import logging
			import os
			import sys
			import json
			from pathlib import Path
			@@ -29,14 +30,7 @@
			from funasr.models.frontend.wav_frontend import WavFrontend
			from funasr.bin.vad_inference import Speech2VadSegment

			header_colors = '\033[95m'
			end_colors = '\033[0m'

			global_asr_language: str = 'zh-cn'
			global_sample_rate: Union[int, Dict[Any, int]] = {
			'audio_fs': 16000,
			'model_fs': 16000
			}


			class Speech2VadSegmentOnline(Speech2VadSegment):
			@@ -96,7 +90,7 @@
			}
			# a. To device
			batch = to_device(batch, device=self.device)
			segments, in_cache = self.vad_model(**batch)
			segments, in_cache = self.vad_model.forward_online(**batch)
			# in_cache.update(batch['in_cache'])
			# in_cache = {key: value for key, value in batch['in_cache'].items()}
			return fbanks, segments, in_cache
			@@ -236,12 +230,14 @@
			# param_dict['in_cache'] = batch['in_cache']
			if results:
			for i, _ in enumerate(keys):
			results[i] = json.dumps(results[i])
			item = {'key': keys[i], 'value': results[i]}
			vad_results.append(item)
			if writer is not None:
			results[i] = json.loads(results[i])
			ibest_writer["text"][keys[i]] = "{}".format(results[i])
			if results[i]:
			if "MODELSCOPE_ENVIRONMENT" in os.environ and os.environ["MODELSCOPE_ENVIRONMENT"] == "eas":
			results[i] = json.dumps(results[i])
			item = {'key': keys[i], 'value': results[i]}
			vad_results.append(item)
			if writer is not None:
			results[i] = json.loads(results[i])
			ibest_writer["text"][keys[i]] = "{}".format(results[i])

			return vad_results