| | |
| | | |
| | | |
| | | class ContextualEmbedderExport2(ContextualEmbedderExport): |
| | | def __init__(self, |
| | | model, |
| | | **kwargs): |
| | | def __init__(self, model, **kwargs): |
| | | super().__init__(model) |
| | | self.embedding = model.bias_embed |
| | | model.bias_encoder.batch_first = False |
| | |
| | | model.decoder = decoder_class(model.decoder, onnx=is_onnx) |
| | | |
| | | from funasr.utils.torch_function import sequence_mask |
| | | |
| | | model.make_pad_mask = sequence_mask(kwargs["max_seq_len"], flip=False) |
| | | model.feats_dim = 560 |
| | | |
| | | import copy |
| | | |
| | | backbone_model = copy.copy(model) |
| | | |
| | | # backbone |
| | | backbone_model.forward = types.MethodType(export_backbone_forward, backbone_model) |
| | | backbone_model.export_dummy_inputs = types.MethodType(export_backbone_dummy_inputs, backbone_model) |
| | | backbone_model.export_input_names = types.MethodType(export_backbone_input_names, backbone_model) |
| | | backbone_model.export_output_names = types.MethodType(export_backbone_output_names, backbone_model) |
| | | backbone_model.export_dynamic_axes = types.MethodType(export_backbone_dynamic_axes, backbone_model) |
| | | backbone_model.export_dummy_inputs = types.MethodType( |
| | | export_backbone_dummy_inputs, backbone_model |
| | | ) |
| | | backbone_model.export_input_names = types.MethodType( |
| | | export_backbone_input_names, backbone_model |
| | | ) |
| | | backbone_model.export_output_names = types.MethodType( |
| | | export_backbone_output_names, backbone_model |
| | | ) |
| | | backbone_model.export_dynamic_axes = types.MethodType( |
| | | export_backbone_dynamic_axes, backbone_model |
| | | ) |
| | | backbone_model.export_name = types.MethodType(export_backbone_name, backbone_model) |
| | | |
| | | return backbone_model, embedder_model |
| | | |
| | | |
| | | def export_backbone_forward( |
| | | self, |
| | |
| | | |
| | | return decoder_out, pre_token_length |
| | | |
| | | |
| | | def export_backbone_dummy_inputs(self): |
| | | speech = torch.randn(2, 30, self.feats_dim) |
| | | speech_lengths = torch.tensor([6, 30], dtype=torch.int32) |
| | | bias_embed = torch.randn(2, 1, 512) |
| | | return (speech, speech_lengths, bias_embed) |
| | | |
| | | |
| | | def export_backbone_input_names(self): |
| | | return ['speech', 'speech_lengths', 'bias_embed'] |
| | | return ["speech", "speech_lengths", "bias_embed"] |
| | | |
| | | |
| | | def export_backbone_output_names(self): |
| | | return ['logits', 'token_num'] |
| | | return ["logits", "token_num"] |
| | | |
| | | |
| | | def export_backbone_dynamic_axes(self): |
| | | return { |
| | | 'speech': { |
| | | 0: 'batch_size', |
| | | 1: 'feats_length' |
| | | "speech": {0: "batch_size", 1: "feats_length"}, |
| | | "speech_lengths": { |
| | | 0: "batch_size", |
| | | }, |
| | | 'speech_lengths': { |
| | | 0: 'batch_size', |
| | | }, |
| | | 'bias_embed': { |
| | | 0: 'batch_size', |
| | | 1: 'num_hotwords' |
| | | }, |
| | | 'logits': { |
| | | 0: 'batch_size', |
| | | 1: 'logits_length' |
| | | }, |
| | | "bias_embed": {0: "batch_size", 1: "num_hotwords"}, |
| | | "logits": {0: "batch_size", 1: "logits_length"}, |
| | | } |
| | | |
| | | |
| | | def export_backbone_name(self): |
| | | return 'model.onnx' |
| | | return "model.onnx" |