onnx (#1473)
* qwenaudio qwenaudiochat
* qwenaudio qwenaudiochat
* whisper
* whisper
* llm
* llm
* llm
* llm
* llm
* llm
* llm
* llm
* export onnx
* export onnx
* export onnx
* dingding
* dingding
* llm
* doc
* onnx
* onnx
* onnx
* onnx
| | |
| | | export_dir = export_utils.export_onnx( |
| | | model=model, |
| | | data_in=data_list, |
| | | quantize=quantize, |
| | | fallback_num=fallback_num, |
| | | calib_num=calib_num, |
| | | opset_version=opset_version, |
| | | **kwargs) |
| | | else: |
| | | export_dir = export_utils.export_torchscripts( |
| | | model=model, |
| | | data_in=data_list, |
| | | quantize=quantize, |
| | | fallback_num=fallback_num, |
| | | calib_num=calib_num, |
| | | opset_version=opset_version, |
| | | **kwargs) |
| | | |
| | | return export_dir |
| | |
| | | decoder_class = tables.decoder_classes.get(kwargs["decoder"] + "Export") |
| | | self.decoder = decoder_class(self.decoder, onnx=is_onnx) |
| | | |
| | | from funasr.utils.torch_function import MakePadMask |
| | | from funasr.utils.torch_function import sequence_mask |
| | | |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | if is_onnx: |
| | | self.make_pad_mask = MakePadMask(max_seq_len, flip=False) |
| | | else: |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | self.forward = self.export_forward |
| | | |
| | |
| | | self.model = model |
| | | self._output_size = model._output_size |
| | | |
| | | from funasr.utils.torch_function import MakePadMask |
| | | from funasr.utils.torch_function import sequence_mask |
| | | |
| | | if onnx: |
| | | self.make_pad_mask = MakePadMask(max_seq_len, flip=False) |
| | | else: |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | from funasr.models.sanm.attention import MultiHeadedAttentionSANMExport |
| | | |
| | |
| | | ): |
| | | super().__init__() |
| | | # self.embed = model.embed #Embedding(model.embed, max_seq_len) |
| | | from funasr.utils.torch_function import MakePadMask |
| | | |
| | | from funasr.utils.torch_function import sequence_mask |
| | | |
| | | self.model = model |
| | | if onnx: |
| | | self.make_pad_mask = MakePadMask(max_seq_len, flip=False) |
| | | else: |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | from funasr.models.sanm.attention import MultiHeadedAttentionSANMDecoderExport |
| | | from funasr.models.sanm.attention import MultiHeadedAttentionCrossAttExport |
| | |
| | | super().__init__() |
| | | # self.embed = model.embed #Embedding(model.embed, max_seq_len) |
| | | self.model = model |
| | | from funasr.utils.torch_function import MakePadMask |
| | | |
| | | from funasr.utils.torch_function import sequence_mask |
| | | |
| | | self.model = model |
| | | if onnx: |
| | | self.make_pad_mask = MakePadMask(max_seq_len, flip=False) |
| | | else: |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | from funasr.models.sanm.attention import MultiHeadedAttentionSANMDecoderExport |
| | | from funasr.models.sanm.attention import MultiHeadedAttentionCrossAttExport |
| | |
| | | # self.embed = model.embed #Embedding(model.embed, max_seq_len) |
| | | self.model = model |
| | | |
| | | from funasr.utils.torch_function import MakePadMask |
| | | |
| | | from funasr.utils.torch_function import sequence_mask |
| | | |
| | | self.model = model |
| | | if onnx: |
| | | self.make_pad_mask = MakePadMask(max_seq_len, flip=False) |
| | | else: |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | |
| | | from funasr.models.transformer.decoder import DecoderLayerExport |
| | |
| | | decoder_class = tables.decoder_classes.get(kwargs["decoder"]+"Export") |
| | | self.decoder = decoder_class(self.decoder, onnx=is_onnx) |
| | | |
| | | from funasr.utils.torch_function import MakePadMask |
| | | from funasr.utils.torch_function import sequence_mask |
| | | |
| | | |
| | | if is_onnx: |
| | | self.make_pad_mask = MakePadMask(max_seq_len, flip=False) |
| | | else: |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | self.forward = self.export_forward |
| | | |
| | | return self |
| | |
| | | decoder_class = tables.decoder_classes.get(kwargs["decoder"] + "Export") |
| | | self.decoder = decoder_class(self.decoder, onnx=is_onnx) |
| | | |
| | | from funasr.utils.torch_function import MakePadMask |
| | | from funasr.utils.torch_function import sequence_mask |
| | | |
| | | if is_onnx: |
| | | self.make_pad_mask = MakePadMask(max_seq_len, flip=False) |
| | | else: |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | |
| | | |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | import copy |
| | | import types |
| | |
| | | self.feats_dim = feats_dim |
| | | self._output_size = model._output_size |
| | | |
| | | from funasr.utils.torch_function import MakePadMask |
| | | |
| | | from funasr.utils.torch_function import sequence_mask |
| | | |
| | | if onnx: |
| | | self.make_pad_mask = MakePadMask(max_seq_len, flip=False) |
| | | else: |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | |
| | | self.make_pad_mask = sequence_mask(max_seq_len, flip=False) |
| | | |
| | | from funasr.models.sanm.attention import MultiHeadedAttentionSANMExport |
| | | if hasattr(model, 'encoders0'): |
| | |
| | | |
| | | model = AutoModel(model=model_dir) |
| | | model_dir = model.export(type="onnx", quantize=quantize) |
| | | config_file = os.path.join(model_dir, 'vad.yaml') |
| | | cmvn_file = os.path.join(model_dir, 'vad.mvn') |
| | | config_file = os.path.join(model_dir, 'config.yaml') |
| | | cmvn_file = os.path.join(model_dir, 'am.mvn') |
| | | config = read_yaml(config_file) |
| | | |
| | | self.frontend = WavFrontend( |
| | |
| | | ) |
| | | self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads) |
| | | self.batch_size = batch_size |
| | | self.vad_scorer = E2EVadModel(config["vad_post_conf"]) |
| | | self.max_end_sil = max_end_sil if max_end_sil is not None else config["vad_post_conf"]["max_end_silence_time"] |
| | | self.vad_scorer = E2EVadModel(config["model_conf"]) |
| | | self.max_end_sil = max_end_sil if max_end_sil is not None else config["model_conf"]["max_end_silence_time"] |
| | | self.encoder_conf = config["encoder_conf"] |
| | | |
| | | def prepare_cache(self, in_cache: list = []): |
| | |
| | | model = AutoModel(model=model_dir) |
| | | model_dir = model.export(type="onnx", quantize=quantize) |
| | | |
| | | config_file = os.path.join(model_dir, 'vad.yaml') |
| | | cmvn_file = os.path.join(model_dir, 'vad.mvn') |
| | | config_file = os.path.join(model_dir, 'config.yaml') |
| | | cmvn_file = os.path.join(model_dir, 'am.mvn') |
| | | config = read_yaml(config_file) |
| | | |
| | | self.frontend = WavFrontendOnline( |
| | |
| | | ) |
| | | self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads) |
| | | self.batch_size = batch_size |
| | | self.vad_scorer = E2EVadModel(config["vad_post_conf"]) |
| | | self.max_end_sil = max_end_sil if max_end_sil is not None else config["vad_post_conf"]["max_end_silence_time"] |
| | | self.vad_scorer = E2EVadModel(config["model_conf"]) |
| | | self.max_end_sil = max_end_sil if max_end_sil is not None else config["model_conf"]["max_end_silence_time"] |
| | | self.encoder_conf = config["encoder_conf"] |
| | | |
| | | def prepare_cache(self, in_cache: list = []): |