python/FunASR-XL.git

			@@ -9,10 +9,11 @@
			import librosa
			import numpy as np

			from .utils import (CharTokenizer, Hypothesis, ONNXRuntimeError,
			OrtInferSession, TokenIDConverter, WavFrontend, get_logger,
			from utils.utils import (CharTokenizer, Hypothesis, ONNXRuntimeError,
			OrtInferSession, TokenIDConverter, get_logger,
			read_yaml)
			from .postprocess_utils import sentence_postprocess
			from utils.postprocess_utils import sentence_postprocess
			from utils.frontend import WavFrontend

			logging = get_logger()

			@@ -40,8 +41,8 @@
			self.ort_infer = OrtInferSession(model_file, device_id)
			self.batch_size = batch_size

			def __call__(self, wav_content: Union[str, np.ndarray, List[str]]) -> List:
			waveform_list = self.load_data(wav_content)
			def __call__(self, wav_content: Union[str, np.ndarray, List[str]], **kwargs) -> List:
			waveform_list = self.load_data(wav_content, self.frontend.opts.samp_freq)
			waveform_nums = len(waveform_list)

			asr_res = []
			@@ -53,8 +54,9 @@
			try:
			am_scores, valid_token_lens = self.infer(feats, feats_len)
			except ONNXRuntimeError:
			logging.error(traceback.format_exc())
			preds = []
			#logging.warning(traceback.format_exc())
			logging.warning("input wav is silence or noise")
			preds = ['']
			else:
			preds = self.decode(am_scores, valid_token_lens)

			@@ -62,10 +64,10 @@
			return asr_res

			def load_data(self,
			wav_content: Union[str, np.ndarray, List[str]]) -> List:
			wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List:
			def load_wav(path: str) -> np.ndarray:
			waveform, _ = librosa.load(path, sr=None)
			return waveform[None, ...]
			waveform, _ = librosa.load(path, sr=fs)
			return waveform

			if isinstance(wav_content, np.ndarray):
			return [wav_content]
			@@ -139,13 +141,4 @@
			# text = self.tokenizer.tokens2text(token)
			return text


			if __name__ == '__main__':
			project_dir = Path(__file__).resolve().parent.parent
			model_dir = "/home/zhifu.gzf/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
			model = Paraformer(model_dir)

			wav_file = os.path.join(model_dir, 'example/asr_example.wav')
			result = model(wav_file)
			print(result)