From dbbe718fc965fa80e67976d629b720dc1a8f1525 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 18 三月 2024 22:24:37 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR merge

---
 funasr/models/paraformer_streaming/model.py |   21 ++++++++++++---------
 1 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/funasr/models/paraformer_streaming/model.py b/funasr/models/paraformer_streaming/model.py
index 9bf5d39..5daa73a 100644
--- a/funasr/models/paraformer_streaming/model.py
+++ b/funasr/models/paraformer_streaming/model.py
@@ -235,8 +235,7 @@
         decoder_out_1st = None
         pre_loss_att = None
         if self.sampling_ratio > 0.0:
-            if self.step_cur < 2:
-                logging.info("enable sampler in paraformer, sampling_ratio: {}".format(self.sampling_ratio))
+
             if self.use_1st_decoder_loss:
                 sematic_embeds, decoder_out_1st, pre_loss_att = \
                     self.sampler_with_grad(encoder_out, encoder_out_lens, ys_pad,
@@ -246,8 +245,6 @@
                     self.sampler(encoder_out, encoder_out_lens, ys_pad,
                                  ys_pad_lens, pre_acoustic_embeds, scama_mask)
         else:
-            if self.step_cur < 2:
-                logging.info("disable sampler in paraformer, sampling_ratio: {}".format(self.sampling_ratio))
             sematic_embeds = pre_acoustic_embeds
         
         # 1. Forward decoder
@@ -534,6 +531,8 @@
         for i in range(n):
             kwargs["is_final"] = _is_final and i == n -1
             audio_sample_i = audio_sample[i*chunk_stride_samples:(i+1)*chunk_stride_samples]
+            if kwargs["is_final"] and len(audio_sample_i) < 960:
+                continue
 
             # extract fbank feats
             speech, speech_lengths = extract_fbank([audio_sample_i], data_type=kwargs.get("data_type", "sound"),
@@ -556,11 +555,15 @@
             self.init_cache(cache, **kwargs)
         
         if kwargs.get("output_dir"):
-            writer = DatadirWriter(kwargs.get("output_dir"))
-            ibest_writer = writer[f"{1}best_recog"]
+            if not hasattr(self, "writer"):
+                self.writer = DatadirWriter(kwargs.get("output_dir"))
+            ibest_writer = self.writer[f"{1}best_recog"]
             ibest_writer["token"][key[0]] = " ".join(tokens)
             ibest_writer["text"][key[0]] = text_postprocessed
-        
+
         return result, meta_data
-
-
+    
+    def export(self, **kwargs):
+        from .export_meta import export_rebuild_model
+        models = export_rebuild_model(model=self, **kwargs)
+        return models
\ No newline at end of file

--
Gitblit v1.9.1