From 0fa815632101cc73ea78363d80bf4a51facb8a7f Mon Sep 17 00:00:00 2001
From: onlybetheone <iriszhangchong@gmail.com>
Date: 星期三, 28 十二月 2022 18:02:25 +0800
Subject: [PATCH] update inverse_text_normalization/zh into fun_text_processing, update setup.py
---
fun_text_processing/inverse_text_normalization/zh/taggers/word.py | 4 ++--
fun_text_processing/inverse_text_normalization/zh/verbalizers/verbalize_final.py | 2 +-
setup.py | 2 +-
fun_text_processing/inverse_text_normalization/zh/taggers/tokenize_and_classify.py | 5 +++--
4 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/fun_text_processing/inverse_text_normalization/zh/taggers/tokenize_and_classify.py b/fun_text_processing/inverse_text_normalization/zh/taggers/tokenize_and_classify.py
index e8ae253..4ceb806 100755
--- a/fun_text_processing/inverse_text_normalization/zh/taggers/tokenize_and_classify.py
+++ b/fun_text_processing/inverse_text_normalization/zh/taggers/tokenize_and_classify.py
@@ -28,6 +28,7 @@
GraphFst,
delete_extra_space,
delete_space,
+ insert_space,
generator_main,
)
from pynini.lib import pynutil
@@ -94,10 +95,10 @@
punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, weight=1.1) + pynutil.insert(" }")
token = pynutil.insert("tokens { ") + classify + pynutil.insert(" }")
token_plus_punct = (
- pynini.closure(punct + pynutil.insert(" ")) + token + pynini.closure(pynutil.insert(" ") + punct)
+ pynini.closure(punct + insert_space) + token + pynini.closure(insert_space + punct)
)
- graph = token_plus_punct + pynini.closure(delete_extra_space + token_plus_punct)
+ graph = token_plus_punct + pynini.closure(insert_space + token_plus_punct)
graph = delete_space + graph + delete_space
self.fst = graph.optimize()
diff --git a/fun_text_processing/inverse_text_normalization/zh/taggers/word.py b/fun_text_processing/inverse_text_normalization/zh/taggers/word.py
index 315b2d9..51a83db 100755
--- a/fun_text_processing/inverse_text_normalization/zh/taggers/word.py
+++ b/fun_text_processing/inverse_text_normalization/zh/taggers/word.py
@@ -11,7 +11,7 @@
# limitations under the License.
import pynini
-from fun_text_processing.inverse_text_normalization.zh.graph_utils import DAMO_NOT_SPACE, GraphFst
+from fun_text_processing.inverse_text_normalization.zh.graph_utils import DAMO_NOT_SPACE, DAMO_CHAR, GraphFst
from pynini.lib import pynutil
@@ -23,5 +23,5 @@
def __init__(self):
super().__init__(name="word", kind="classify")
- word = pynutil.insert("name: \"") + pynini.closure(DAMO_NOT_SPACE, 1) + pynutil.insert("\"")
+ word = pynutil.insert("name: \"") + DAMO_NOT_SPACE + pynutil.insert("\"")
self.fst = word.optimize()
diff --git a/fun_text_processing/inverse_text_normalization/zh/verbalizers/verbalize_final.py b/fun_text_processing/inverse_text_normalization/zh/verbalizers/verbalize_final.py
index 005e148..25133e9 100755
--- a/fun_text_processing/inverse_text_normalization/zh/verbalizers/verbalize_final.py
+++ b/fun_text_processing/inverse_text_normalization/zh/verbalizers/verbalize_final.py
@@ -37,5 +37,5 @@
+ delete_space
+ pynutil.delete("}")
)
- graph = delete_space + pynini.closure(graph + delete_extra_space) + graph + delete_space
+ graph = delete_space + pynini.closure(graph + delete_space) + graph + delete_space
self.fst = graph
diff --git a/setup.py b/setup.py
index e17c6ae..05db779 100644
--- a/setup.py
+++ b/setup.py
@@ -124,7 +124,7 @@
long_description=open(os.path.join(dirname, "README.md"), encoding="utf-8").read(),
long_description_content_type="text/markdown",
license="The MIT License",
- packages=find_packages(include=["funasr*"]),
+ packages=find_packages(include=["funasr*", "fun_text_processing*"]),
package_data={"funasr": ["version.txt"]},
install_requires=install_requires,
setup_requires=setup_requires,
--
Gitblit v1.9.1