python/FunASR-XL.git

			@@ -1,4 +1,6 @@
			# -- encoding: utf-8 --
			# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
			# MIT License (https://opensource.org/licenses/MIT)

			import os.path
			from pathlib import Path
			@@ -19,11 +21,15 @@


			class Paraformer():
			"""
			Author: Speech Lab of DAMO Academy, Alibaba Group
			Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition
			https://arxiv.org/abs/2206.08317
			"""
			def __init__(self, model_dir: Union[str, Path] = None,
			batch_size: int = 1,
			device_id: Union[str, int] = "-1",
			plot_timestamp_to: str = "",
			pred_bias: int = 1,
			quantize: bool = False,
			intra_op_num_threads: int = 4,
			):
			@@ -47,7 +53,10 @@
			self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads)
			self.batch_size = batch_size
			self.plot_timestamp_to = plot_timestamp_to
			self.pred_bias = pred_bias
			if "predictor_bias" in config['model_conf'].keys():
			self.pred_bias = config['model_conf']['predictor_bias']
			else:
			self.pred_bias = 0

			def __call__(self, wav_content: Union[str, np.ndarray, List[str]], **kwargs) -> List:
			waveform_list = self.load_data(wav_content, self.frontend.opts.frame_opts.samp_freq)