From 320c7ff2c2dfbce13ee01589a64b515bf2d7857b Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期五, 29 三月 2024 12:30:35 +0800
Subject: [PATCH] Dev gzf new (#1562)
---
funasr/models/fsmn_vad_streaming/model.py | 57 ++++++++++++---------------------------------------------
1 files changed, 12 insertions(+), 45 deletions(-)
diff --git a/funasr/models/fsmn_vad_streaming/model.py b/funasr/models/fsmn_vad_streaming/model.py
index d06db20..f1d4161 100644
--- a/funasr/models/fsmn_vad_streaming/model.py
+++ b/funasr/models/fsmn_vad_streaming/model.py
@@ -543,6 +543,11 @@
cache["frontend"] = {}
cache["prev_samples"] = torch.empty(0)
cache["encoder"] = {}
+
+ if kwargs.get("max_end_silence_time") is not None:
+ # update the max_end_silence_time
+ self.vad_opts.max_end_silence_time = kwargs.get("max_end_silence_time")
+
windows_detector = WindowDetector(self.vad_opts.window_size_ms,
self.vad_opts.sil_to_speech_time_thres,
self.vad_opts.speech_to_sil_time_thres,
@@ -633,8 +638,8 @@
results = []
result_i = {"key": key[0], "value": segments}
- if "MODELSCOPE_ENVIRONMENT" in os.environ and os.environ["MODELSCOPE_ENVIRONMENT"] == "eas":
- result_i = json.dumps(result_i)
+ # if "MODELSCOPE_ENVIRONMENT" in os.environ and os.environ["MODELSCOPE_ENVIRONMENT"] == "eas":
+ # result_i = json.dumps(result_i)
results.append(result_i)
@@ -644,49 +649,11 @@
return results, meta_data
def export(self, **kwargs):
- is_onnx = kwargs.get("type", "onnx") == "onnx"
- encoder_class = tables.encoder_classes.get(kwargs["encoder"] + "Export")
- self.encoder = encoder_class(self.encoder, onnx=is_onnx)
- self.forward = self._export_forward
-
- return self
-
- def export_forward(self, feats: torch.Tensor, *args, **kwargs):
-
- scores, out_caches = self.encoder(feats, *args)
-
- return scores, out_caches
-
- def export_dummy_inputs(self, data_in=None, frame=30):
- if data_in is None:
- speech = torch.randn(1, frame, self.encoder_conf.get("input_dim"))
- else:
- speech = None # Undo
-
- cache_frames = self.encoder_conf.get("lorder") + self.encoder_conf.get("rorder") - 1
- in_cache0 = torch.randn(1, self.encoder_conf.get("proj_dim"), cache_frames, 1)
- in_cache1 = torch.randn(1, self.encoder_conf.get("proj_dim"), cache_frames, 1)
- in_cache2 = torch.randn(1, self.encoder_conf.get("proj_dim"), cache_frames, 1)
- in_cache3 = torch.randn(1, self.encoder_conf.get("proj_dim"), cache_frames, 1)
-
- return (speech, in_cache0, in_cache1, in_cache2, in_cache3)
-
- def export_input_names(self):
- return ['speech', 'in_cache0', 'in_cache1', 'in_cache2', 'in_cache3']
-
- def export_output_names(self):
- return ['logits', 'out_cache0', 'out_cache1', 'out_cache2', 'out_cache3']
-
- def export_dynamic_axes(self):
- return {
- 'speech': {
- 1: 'feats_length'
- },
- }
-
- def export_name(self, ):
- return "model.onnx"
-
+
+ from .export_meta import export_rebuild_model
+ models = export_rebuild_model(model=self, **kwargs)
+ return models
+
def DetectCommonFrames(self, cache: dict = {}) -> int:
if cache["stats"].vad_state_machine == VadStateMachine.kVadInStateEndPointDetected:
return 0
--
Gitblit v1.9.1