| | |
| | | if quantize: |
| | | model_file = os.path.join(model_dir, "model_quant.onnx") |
| | | if not os.path.exists(model_file): |
| | | print(".onnx is not exist, begin to export onnx") |
| | | print(".onnx does not exist, begin to export onnx") |
| | | try: |
| | | from funasr import AutoModel |
| | | except: |
| | |
| | | plt.savefig(plotname, bbox_inches="tight") |
| | | |
| | | def load_data(self, wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List: |
| | | def convert_to_wav(input_path, output_path): |
| | | from pydub import AudioSegment |
| | | try: |
| | | audio = AudioSegment.from_mp3(input_path) |
| | | audio.export(output_path, format="wav") |
| | | print("音频文件为mp3格式,已转换为wav格式") |
| | | |
| | | except Exception as e: |
| | | print(f"转换失败:{e}") |
| | | |
| | | def load_wav(path: str) -> np.ndarray: |
| | | if not path.lower().endswith('.wav'): |
| | | import os |
| | | input_path = path |
| | | path = os.path.splitext(path)[0]+'.wav' |
| | | convert_to_wav(input_path,path) #将mp3格式转换成wav格式 |
| | | |
| | | waveform, _ = librosa.load(path, sr=fs) |
| | | return waveform |
| | | |
| | |
| | | model_eb_file = os.path.join(model_dir, "model_eb.onnx") |
| | | |
| | | if not (os.path.exists(model_eb_file) and os.path.exists(model_bb_file)): |
| | | print(".onnx is not exist, begin to export onnx") |
| | | print(".onnx does not exist, begin to export onnx") |
| | | try: |
| | | from funasr import AutoModel |
| | | except: |
| | |
| | | self.pred_bias = config["model_conf"]["predictor_bias"] |
| | | else: |
| | | self.pred_bias = 0 |
| | | if "lang" in config: |
| | | self.language = config["lang"] |
| | | else: |
| | | self.language = None |
| | | |
| | | def __call__( |
| | | self, wav_content: Union[str, np.ndarray, List[str]], hotwords: str, **kwargs |
| | |
| | | # ) -> List: |
| | | # make hotword list |
| | | hotwords, hotwords_length = self.proc_hotword(hotwords) |
| | | # import pdb; pdb.set_trace() |
| | | [bias_embed] = self.eb_infer(hotwords, hotwords_length) |
| | | # index from bias_embed |
| | | bias_embed = bias_embed.transpose(1, 0, 2) |
| | |
| | | return np.array(hotwords) |
| | | |
| | | hotword_int = [word_map(i) for i in hotwords] |
| | | # import pdb; pdb.set_trace() |
| | | |
| | | hotword_int.append(np.array([1])) |
| | | hotwords = pad_list(hotword_int, pad_value=0, max_len=10) |
| | | # import pdb; pdb.set_trace() |
| | | |
| | | return hotwords, hotwords_length |
| | | |
| | | def bb_infer( |