From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交

---
 fun_text_processing/inverse_text_normalization/id/graph_utils.py |   60 ++++++++++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/fun_text_processing/inverse_text_normalization/id/graph_utils.py b/fun_text_processing/inverse_text_normalization/id/graph_utils.py
index ccc9fa9..81a4da9 100644
--- a/fun_text_processing/inverse_text_normalization/id/graph_utils.py
+++ b/fun_text_processing/inverse_text_normalization/id/graph_utils.py
@@ -18,9 +18,9 @@
 DAMO_ALPHA = pynini.union(DAMO_LOWER, DAMO_UPPER).optimize()
 DAMO_ALNUM = pynini.union(DAMO_DIGIT, DAMO_ALPHA).optimize()
 DAMO_HEX = pynini.union(*string.hexdigits).optimize()
-DAMO_NON_BREAKING_SPACE = u"\u00A0"
+DAMO_NON_BREAKING_SPACE = "\u00A0"
 DAMO_SPACE = " "
-DAMO_WHITE_SPACE = pynini.union(" ", "\t", "\n", "\r", u"\u00A0").optimize()
+DAMO_WHITE_SPACE = pynini.union(" ", "\t", "\n", "\r", "\u00A0").optimize()
 DAMO_NOT_SPACE = pynini.difference(DAMO_CHAR, DAMO_WHITE_SPACE).optimize()
 DAMO_NOT_QUOTE = pynini.difference(DAMO_CHAR, r'"').optimize()
 
@@ -35,31 +35,55 @@
 delete_extra_space = pynini.cross(pynini.closure(DAMO_WHITE_SPACE, 1), " ")
 delete_preserve_order = pynini.closure(
     pynutil.delete(" preserve_order: true")
-    | (pynutil.delete(" field_order: \"") + DAMO_NOT_QUOTE + pynutil.delete("\""))
+    | (pynutil.delete(' field_order: "') + DAMO_NOT_QUOTE + pynutil.delete('"'))
 )
 
 suppletive = pynini.string_file(get_abs_path("data/suppletive.tsv"))
 # _v = pynini.union("a", "e", "i", "o", "u")
 _c = pynini.union(
-    "b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z"
+    "b",
+    "c",
+    "d",
+    "f",
+    "g",
+    "h",
+    "j",
+    "k",
+    "l",
+    "m",
+    "n",
+    "p",
+    "q",
+    "r",
+    "s",
+    "t",
+    "v",
+    "w",
+    "x",
+    "y",
+    "z",
 )
 _ies = DAMO_SIGMA + _c + pynini.cross("y", "ies")
 _es = DAMO_SIGMA + pynini.union("s", "sh", "ch", "x", "z") + pynutil.insert("es")
 _s = DAMO_SIGMA + pynutil.insert("s")
 
 graph_plural = plurals._priority_union(
-    suppletive, plurals._priority_union(_ies, plurals._priority_union(_es, _s, DAMO_SIGMA), DAMO_SIGMA), DAMO_SIGMA
+    suppletive,
+    plurals._priority_union(_ies, plurals._priority_union(_es, _s, DAMO_SIGMA), DAMO_SIGMA),
+    DAMO_SIGMA,
 ).optimize()
 
 SINGULAR_TO_PLURAL = graph_plural
 PLURAL_TO_SINGULAR = pynini.invert(graph_plural)
-TO_LOWER = pynini.union(*[pynini.cross(x, y) for x, y in zip(string.ascii_uppercase, string.ascii_lowercase)])
+TO_LOWER = pynini.union(
+    *[pynini.cross(x, y) for x, y in zip(string.ascii_uppercase, string.ascii_lowercase)]
+)
 TO_UPPER = pynini.invert(TO_LOWER)
 MIN_NEG_WEIGHT = -0.0001
 MIN_POS_WEIGHT = 0.0001
 
 
-def generator_main(file_name: str, graphs: Dict[str, 'pynini.FstLike']):
+def generator_main(file_name: str, graphs: Dict[str, "pynini.FstLike"]):
     """
     Exports graph as OpenFst finite state archive (FAR) file with given file name and rule name.
 
@@ -71,7 +95,7 @@
     for rule, graph in graphs.items():
         exporter[rule] = graph.optimize()
     exporter.close()
-    print(f'Created {file_name}')
+    print(f"Created {file_name}")
 
 
 def get_plurals(fst):
@@ -98,7 +122,7 @@
     return PLURAL_TO_SINGULAR @ fst
 
 
-def convert_space(fst) -> 'pynini.FstLike':
+def convert_space(fst) -> "pynini.FstLike":
     """
     Converts space to nonbreaking space.
     Used only in tagger grammars for transducing token values within quotes, e.g. name: "hello kitty"
@@ -109,7 +133,9 @@
 
     Returns output fst where breaking spaces are converted to non breaking spaces
     """
-    return fst @ pynini.cdrewrite(pynini.cross(DAMO_SPACE, DAMO_NON_BREAKING_SPACE), "", "", DAMO_SIGMA)
+    return fst @ pynini.cdrewrite(
+        pynini.cross(DAMO_SPACE, DAMO_NON_BREAKING_SPACE), "", "", DAMO_SIGMA
+    )
 
 
 class GraphFst:
@@ -129,9 +155,11 @@
         self._fst = None
         self.deterministic = deterministic
 
-        self.far_path = Path(os.path.dirname(__file__) + '/grammars/' + kind + '/' + name + '.far')
+        self.far_path = Path(os.path.dirname(__file__) + "/grammars/" + kind + "/" + name + ".far")
         if self.far_exist():
-            self._fst = Far(self.far_path, mode="r", arc_type="standard", far_type="default").get_fst()
+            self._fst = Far(
+                self.far_path, mode="r", arc_type="standard", far_type="default"
+            ).get_fst()
 
     def far_exist(self) -> bool:
         """
@@ -140,14 +168,14 @@
         return self.far_path.exists()
 
     @property
-    def fst(self) -> 'pynini.FstLike':
+    def fst(self) -> "pynini.FstLike":
         return self._fst
 
     @fst.setter
     def fst(self, fst):
         self._fst = fst
 
-    def add_tokens(self, fst) -> 'pynini.FstLike':
+    def add_tokens(self, fst) -> "pynini.FstLike":
         """
         Wraps class name around to given fst
 
@@ -159,7 +187,7 @@
         """
         return pynutil.insert(f"{self.name} {{ ") + fst + pynutil.insert(" }")
 
-    def delete_tokens(self, fst) -> 'pynini.FstLike':
+    def delete_tokens(self, fst) -> "pynini.FstLike":
         """
         Deletes class name wrap around output of given fst
 
@@ -178,4 +206,4 @@
             + delete_space
             + pynutil.delete("}")
         )
-        return res @ pynini.cdrewrite(pynini.cross(u"\u00A0", " "), "", "", DAMO_SIGMA)
+        return res @ pynini.cdrewrite(pynini.cross("\u00A0", " "), "", "", DAMO_SIGMA)

--
Gitblit v1.9.1