From 4ba1011b42e041ee1d71448eefd7ef2e7bd61bb6 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期五, 31 三月 2023 15:31:26 +0800
Subject: [PATCH] export

---
 funasr/bin/asr_inference_uniasr.py |   33 ++++++++++++++++-----------------
 1 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/funasr/bin/asr_inference_uniasr.py b/funasr/bin/asr_inference_uniasr.py
index c50bf17..1286bc2 100644
--- a/funasr/bin/asr_inference_uniasr.py
+++ b/funasr/bin/asr_inference_uniasr.py
@@ -37,16 +37,13 @@
 from funasr.models.frontend.wav_frontend import WavFrontend
 
 
-header_colors = '\033[95m'
-end_colors = '\033[0m'
-
 
 class Speech2Text:
     """Speech2Text class
 
     Examples:
         >>> import soundfile
-        >>> speech2text = Speech2Text("asr_config.yml", "asr.pth")
+        >>> speech2text = Speech2Text("asr_config.yml", "asr.pb")
         >>> audio, rate = soundfile.read("speech.wav")
         >>> speech2text(audio)
         [(text, token, token_int, hypothesis object), ...]
@@ -261,6 +258,7 @@
 
             # Change integer-ids to tokens
             token = self.converter.ids2tokens(token_int)
+            token = list(filter(lambda x: x != "<gbg>", token))
 
             if self.tokenizer is not None:
                 text = self.tokenizer.tokens2text(token)
@@ -398,6 +396,19 @@
     else:
         device = "cpu"
     
+    if param_dict is not None and "decoding_model" in param_dict:
+        if param_dict["decoding_model"] == "fast":
+            decoding_ind = 0
+            decoding_mode = "model1"
+        elif param_dict["decoding_model"] == "normal":
+            decoding_ind = 0
+            decoding_mode = "model2"
+        elif param_dict["decoding_model"] == "offline":
+            decoding_ind = 1
+            decoding_mode = "model2"
+        else:
+            raise NotImplementedError("unsupported decoding model {}".format(param_dict["decoding_model"]))
+
     # 1. Set random-seed
     set_all_random_seed(seed)
 
@@ -440,18 +451,6 @@
             if isinstance(raw_inputs, torch.Tensor):
                 raw_inputs = raw_inputs.numpy()
             data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
-        if param_dict is not None and "decoding_model" in param_dict:
-            if param_dict["decoding_model"] == "fast":
-                speech2text.decoding_ind = 0
-                speech2text.decoding_mode = "model1"
-            elif param_dict["decoding_model"] == "normal":
-                speech2text.decoding_ind = 0
-                speech2text.decoding_mode = "model2"
-            elif param_dict["decoding_model"] == "offline":
-                speech2text.decoding_ind = 1
-                speech2text.decoding_mode = "model2"
-            else:
-                raise NotImplementedError("unsupported decoding model {}".format(param_dict["decoding_model"]))
         loader = ASRTask.build_streaming_iterator(
             data_path_and_name_and_type,
             dtype=dtype,
@@ -511,7 +510,7 @@
                     finish_count += 1
                     asr_utils.print_progress(finish_count / file_count)
                     if writer is not None:
-                        ibest_writer["text"][key] = text
+                        ibest_writer["text"][key] = text_postprocessed
         return asr_result_list
     
     return _forward

--
Gitblit v1.9.1