From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交

---
 fun_text_processing/inverse_text_normalization/zh/graph_utils.py |   64 +++++++++++++++++++++++--------
 1 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/fun_text_processing/inverse_text_normalization/zh/graph_utils.py b/fun_text_processing/inverse_text_normalization/zh/graph_utils.py
index 5349863..9e213b8 100755
--- a/fun_text_processing/inverse_text_normalization/zh/graph_utils.py
+++ b/fun_text_processing/inverse_text_normalization/zh/graph_utils.py
@@ -1,4 +1,3 @@
-
 import os
 import string
 from pathlib import Path
@@ -20,10 +19,10 @@
 DAMO_ALNUM = pynini.union(DAMO_DIGIT, DAMO_ALPHA).optimize()
 
 DAMO_HEX = pynini.union(*string.hexdigits).optimize()
-DAMO_NON_BREAKING_SPACE = u"\u00A0"
+DAMO_NON_BREAKING_SPACE = "\u00A0"
 
 DAMO_SPACE = " "
-DAMO_WHITE_SPACE = pynini.union(" ", "\t", "\n", "\r", u"\u00A0").optimize()
+DAMO_WHITE_SPACE = pynini.union(" ", "\t", "\n", "\r", "\u00A0").optimize()
 DAMO_NOT_SPACE = pynini.difference(DAMO_CHAR, DAMO_WHITE_SPACE).optimize()
 DAMO_NOT_QUOTE = pynini.difference(DAMO_CHAR, r'"').optimize()
 
@@ -39,30 +38,55 @@
 
 delete_preserve_order = pynini.closure(
     pynutil.delete(" preserve_order: true")
-    | (pynutil.delete(" field_order: \"") + DAMO_NOT_QUOTE + pynutil.delete("\""))
+    | (pynutil.delete(' field_order: "') + DAMO_NOT_QUOTE + pynutil.delete('"'))
 )
 
 suppletive = pynini.string_file(get_abs_path("data/suppletive.tsv"))
 # _v = pynini.union("a", "e", "i", "o", "u")
 _c = pynini.union(
-    "b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z"
+    "b",
+    "c",
+    "d",
+    "f",
+    "g",
+    "h",
+    "j",
+    "k",
+    "l",
+    "m",
+    "n",
+    "p",
+    "q",
+    "r",
+    "s",
+    "t",
+    "v",
+    "w",
+    "x",
+    "y",
+    "z",
 )
 _ies = DAMO_SIGMA + _c + pynini.cross("y", "ies")
 _es = DAMO_SIGMA + pynini.union("s", "sh", "ch", "x", "z") + pynutil.insert("es")
 _s = DAMO_SIGMA + pynutil.insert("s")
 
 graph_plural = plurals._priority_union(
-    suppletive, plurals._priority_union(_ies, plurals._priority_union(_es, _s, DAMO_SIGMA), DAMO_SIGMA), DAMO_SIGMA
+    suppletive,
+    plurals._priority_union(_ies, plurals._priority_union(_es, _s, DAMO_SIGMA), DAMO_SIGMA),
+    DAMO_SIGMA,
 ).optimize()
 
 SINGULAR_TO_PLURAL = graph_plural
 PLURAL_TO_SINGULAR = pynini.invert(graph_plural)
-TO_LOWER = pynini.union(*[pynini.cross(x, y) for x, y in zip(string.ascii_uppercase, string.ascii_lowercase)])
+TO_LOWER = pynini.union(
+    *[pynini.cross(x, y) for x, y in zip(string.ascii_uppercase, string.ascii_lowercase)]
+)
 TO_UPPER = pynini.invert(TO_LOWER)
 MIN_NEG_WEIGHT = -0.0001
 MIN_POS_WEIGHT = 0.0001
 
-def generator_main(file_name: str, graphs: Dict[str, 'pynini.FstLike']):
+
+def generator_main(file_name: str, graphs: Dict[str, "pynini.FstLike"]):
     """
     Exports graph as OpenFst finite state archive (FAR) file with given file name and rule name.
 
@@ -74,7 +98,8 @@
     for rule, graph in graphs.items():
         exporter[rule] = graph.optimize()
     exporter.close()
-    print(f'Created {file_name}')
+    print(f"Created {file_name}")
+
 
 def get_plurals(fst):
     """
@@ -96,7 +121,7 @@
     return PLURAL_TO_SINGULAR @ fst
 
 
-def convert_space(fst) -> 'pynini.FstLike':
+def convert_space(fst) -> "pynini.FstLike":
     """
     Converts space to nonbreaking space.
     Used only in tagger grammars for transducing token values within quotes, e.g. name: "hello kitty"
@@ -105,7 +130,10 @@
         fst: input fst
     Returns output fst where breaking spaces are converted to non breaking spaces
     """
-    return fst @ pynini.cdrewrite(pynini.cross(DAMO_SPACE, DAMO_NON_BREAKING_SPACE), "", "", DAMO_SIGMA)
+    return fst @ pynini.cdrewrite(
+        pynini.cross(DAMO_SPACE, DAMO_NON_BREAKING_SPACE), "", "", DAMO_SIGMA
+    )
+
 
 class GraphFst:
     """
@@ -124,9 +152,11 @@
         self._fst = None
         self.deterministic = deterministic
 
-        self.far_path = Path(os.path.dirname(__file__) + '/grammars/' + kind + '/' + name + '.far')
+        self.far_path = Path(os.path.dirname(__file__) + "/grammars/" + kind + "/" + name + ".far")
         if self.far_exist():
-            self._fst = Far(self.far_path, mode="r", arc_type="standard", far_type="default").get_fst()
+            self._fst = Far(
+                self.far_path, mode="r", arc_type="standard", far_type="default"
+            ).get_fst()
 
     def far_exist(self) -> bool:
         """
@@ -135,14 +165,14 @@
         return self.far_path.exists()
 
     @property
-    def fst(self) -> 'pynini.FstLike':
+    def fst(self) -> "pynini.FstLike":
         return self._fst
 
     @fst.setter
     def fst(self, fst):
         self._fst = fst
 
-    def add_tokens(self, fst) -> 'pynini.FstLike':
+    def add_tokens(self, fst) -> "pynini.FstLike":
         """
         Wraps class name around to given fst
 
@@ -154,7 +184,7 @@
         """
         return pynutil.insert(f"{self.name} {{ ") + fst + pynutil.insert(" }")
 
-    def delete_tokens(self, fst) -> 'pynini.FstLike':
+    def delete_tokens(self, fst) -> "pynini.FstLike":
         """
         Deletes class name wrap around output of given fst
 
@@ -173,4 +203,4 @@
             + delete_space
             + pynutil.delete("}")
         )
-        return res @ pynini.cdrewrite(pynini.cross(u"\u00A0", " "), "", "", DAMO_SIGMA)
+        return res @ pynini.cdrewrite(pynini.cross("\u00A0", " "), "", "", DAMO_SIGMA)

--
Gitblit v1.9.1