From c6361cc2a7e99be802d7d7e81a93e874f0faf5cd Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 15 一月 2024 14:15:53 +0800
Subject: [PATCH] funasr1.0

---
 funasr/models/monotonic_aligner/model.py |   22 +++++++++++++---------
 1 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/funasr/models/monotonic_aligner/model.py b/funasr/models/monotonic_aligner/model.py
index a0d745f..6309732 100644
--- a/funasr/models/monotonic_aligner/model.py
+++ b/funasr/models/monotonic_aligner/model.py
@@ -41,15 +41,15 @@
         super().__init__()
 
         if specaug is not None:
-            specaug_class = tables.specaug_classes.get(specaug.lower())
+            specaug_class = tables.specaug_classes.get(specaug)
             specaug = specaug_class(**specaug_conf)
         if normalize is not None:
-            normalize_class = tables.normalize_classes.get(normalize.lower())
+            normalize_class = tables.normalize_classes.get(normalize)
             normalize = normalize_class(**normalize_conf)
-        encoder_class = tables.encoder_classes.get(encoder.lower())
+        encoder_class = tables.encoder_classes.get(encoder)
         encoder = encoder_class(input_size=input_size, **encoder_conf)
         encoder_output_size = encoder.output_size()
-        predictor_class = tables.predictor_classes.get(predictor.lower())
+        predictor_class = tables.predictor_classes.get(predictor)
         predictor = predictor_class(**predictor_conf)
         self.specaug = specaug
         self.normalize = normalize
@@ -166,7 +166,8 @@
         meta_data["extract_feat"] = f"{time3 - time2:0.3f}"
         meta_data["batch_data_time"] = speech_lengths.sum().item() * frontend.frame_shift * frontend.lfr_n / 1000
             
-        speech.to(device=kwargs["device"]), speech_lengths.to(device=kwargs["device"])
+        speech = speech.to(device=kwargs["device"])
+        speech_lengths = speech_lengths.to(device=kwargs["device"])
 
         # Encoder
         encoder_out, encoder_out_lens = self.encode(speech, speech_lengths)
@@ -188,9 +189,12 @@
             text_postprocessed, time_stamp_postprocessed, _ = postprocess_utils.sentence_postprocess(token, timestamp)
             result_i = {"key": key[i], "text": text_postprocessed,
                                 "timestamp": time_stamp_postprocessed,
-                                }    
-            # ibest_writer["token"][key[i]] = " ".join(token)
-            ibest_writer["timestamp_list"][key[i]] = time_stamp_postprocessed
-            ibest_writer["timestamp_str"][key[i]] = timestamp_str
+                                }
             results.append(result_i)
+
+            if ibest_writer:
+                # ibest_writer["token"][key[i]] = " ".join(token)
+                ibest_writer["timestamp_list"][key[i]] = time_stamp_postprocessed
+                ibest_writer["timestamp_str"][key[i]] = timestamp_str
+            
         return results, meta_data
\ No newline at end of file

--
Gitblit v1.9.1