From 77045e7bb78d4b8a82f96130f9d84e356a32d5c5 Mon Sep 17 00:00:00 2001
From: aky15 <ankeyu.aky@11.17.44.249>
Date: 星期二, 09 五月 2023 11:16:07 +0800
Subject: [PATCH] rnnt bug fix
---
funasr/bin/punctuation_infer_vadrealtime.py | 34 ++++++----------------------------
1 files changed, 6 insertions(+), 28 deletions(-)
diff --git a/funasr/bin/punctuation_infer_vadrealtime.py b/funasr/bin/punctuation_infer_vadrealtime.py
index d6cc153..81f9d7a 100644
--- a/funasr/bin/punctuation_infer_vadrealtime.py
+++ b/funasr/bin/punctuation_infer_vadrealtime.py
@@ -23,7 +23,7 @@
from funasr.utils import config_argparse
from funasr.utils.types import str2triple_str
from funasr.utils.types import str_or_none
-from funasr.punctuation.text_preprocessor import split_to_mini_sentence
+from funasr.datasets.preprocessor import split_to_mini_sentence
class Text2Punc:
@@ -69,6 +69,7 @@
precache = "".join(cache)
else:
precache = ""
+ cache = []
data = {"text": precache + text}
result = self.preprocessor(data=data, uid="12938712838719")
split_text = self.preprocessor.pop_split_text_data(result)
@@ -225,7 +226,7 @@
):
results = []
split_size = 10
-
+ cache_in = param_dict["cache"]
if raw_inputs != None:
line = raw_inputs.strip()
key = "demo"
@@ -233,35 +234,12 @@
item = {'key': key, 'value': ""}
results.append(item)
return results
- #import pdb;pdb.set_trace()
- result, _, cache = text2punc(line, cache)
- item = {'key': key, 'value': result, 'cache': cache}
+ result, _, cache = text2punc(line, cache_in)
+ param_dict["cache"] = cache
+ item = {'key': key, 'value': result}
results.append(item)
return results
- for inference_text, _, _ in data_path_and_name_and_type:
- with open(inference_text, "r", encoding="utf-8") as fin:
- for line in fin:
- line = line.strip()
- segs = line.split("\t")
- if len(segs) != 2:
- continue
- key = segs[0]
- if len(segs[1]) == 0:
- continue
- result, _ = text2punc(segs[1])
- item = {'key': key, 'value': result}
- results.append(item)
- output_path = output_dir_v2 if output_dir_v2 is not None else output_dir
- if output_path != None:
- output_file_name = "infer.out"
- Path(output_path).mkdir(parents=True, exist_ok=True)
- output_file_path = (Path(output_path) / output_file_name).absolute()
- with open(output_file_path, "w", encoding="utf-8") as fout:
- for item_i in results:
- key_out = item_i["key"]
- value_out = item_i["value"]
- fout.write(f"{key_out}\t{value_out}\n")
return results
return _forward
--
Gitblit v1.9.1