From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交

---
 fun_text_processing/inverse_text_normalization/inverse_normalize.py |  128 ++++++++++++++++++++++++++++--------------
 1 files changed, 84 insertions(+), 44 deletions(-)

diff --git a/fun_text_processing/inverse_text_normalization/inverse_normalize.py b/fun_text_processing/inverse_text_normalization/inverse_normalize.py
index fe6aa31..2dffd5d 100644
--- a/fun_text_processing/inverse_text_normalization/inverse_normalize.py
+++ b/fun_text_processing/inverse_text_normalization/inverse_normalize.py
@@ -21,73 +21,102 @@
         overwrite_cache: set to True to overwrite .far files
     """
 
-    def __init__(self, lang: str = 'en', cache_dir: str = None, overwrite_cache: bool = False,
-                 enable_standalone_number: bool = True,
-                 enable_0_to_9: bool = True):
+    def __init__(
+        self,
+        lang: str = "en",
+        cache_dir: str = None,
+        overwrite_cache: bool = False,
+        enable_standalone_number: bool = True,
+        enable_0_to_9: bool = True,
+    ):
 
-        if lang == 'en':
-            from fun_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
+        if lang == "en":
+            from fun_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import (
+                ClassifyFst,
+            )
             from fun_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
 
-        elif lang == 'id':
-            from fun_text_processing.inverse_text_normalization.id.taggers.tokenize_and_classify import ClassifyFst
+        elif lang == "id":
+            from fun_text_processing.inverse_text_normalization.id.taggers.tokenize_and_classify import (
+                ClassifyFst,
+            )
             from fun_text_processing.inverse_text_normalization.id.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
 
-        elif lang == 'ja':
-            from fun_text_processing.inverse_text_normalization.ja.taggers.tokenize_and_classify import ClassifyFst
+        elif lang == "ja":
+            from fun_text_processing.inverse_text_normalization.ja.taggers.tokenize_and_classify import (
+                ClassifyFst,
+            )
             from fun_text_processing.inverse_text_normalization.ja.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
 
-        elif lang == 'es':
-            from fun_text_processing.inverse_text_normalization.es.taggers.tokenize_and_classify import ClassifyFst
+        elif lang == "es":
+            from fun_text_processing.inverse_text_normalization.es.taggers.tokenize_and_classify import (
+                ClassifyFst,
+            )
             from fun_text_processing.inverse_text_normalization.es.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
 
-        elif lang == 'pt':
-            from fun_text_processing.inverse_text_normalization.pt.taggers.tokenize_and_classify import ClassifyFst
+        elif lang == "pt":
+            from fun_text_processing.inverse_text_normalization.pt.taggers.tokenize_and_classify import (
+                ClassifyFst,
+            )
             from fun_text_processing.inverse_text_normalization.pt.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
 
-        elif lang == 'ru':
-            from fun_text_processing.inverse_text_normalization.ru.taggers.tokenize_and_classify import ClassifyFst
+        elif lang == "ru":
+            from fun_text_processing.inverse_text_normalization.ru.taggers.tokenize_and_classify import (
+                ClassifyFst,
+            )
             from fun_text_processing.inverse_text_normalization.ru.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
 
-        elif lang == 'de':
-            from fun_text_processing.inverse_text_normalization.de.taggers.tokenize_and_classify import ClassifyFst
+        elif lang == "de":
+            from fun_text_processing.inverse_text_normalization.de.taggers.tokenize_and_classify import (
+                ClassifyFst,
+            )
             from fun_text_processing.inverse_text_normalization.de.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
-        elif lang == 'fr':
-            from fun_text_processing.inverse_text_normalization.fr.taggers.tokenize_and_classify import ClassifyFst
+        elif lang == "fr":
+            from fun_text_processing.inverse_text_normalization.fr.taggers.tokenize_and_classify import (
+                ClassifyFst,
+            )
             from fun_text_processing.inverse_text_normalization.fr.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
-        elif lang == 'vi':
-            from fun_text_processing.inverse_text_normalization.vi.taggers.tokenize_and_classify import ClassifyFst
+        elif lang == "vi":
+            from fun_text_processing.inverse_text_normalization.vi.taggers.tokenize_and_classify import (
+                ClassifyFst,
+            )
             from fun_text_processing.inverse_text_normalization.vi.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
-        elif lang == 'ko':
-            from fun_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
+        elif lang == "ko":
+            from fun_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import (
+                ClassifyFst,
+            )
             from fun_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
-        elif lang == 'zh':
-            from fun_text_processing.inverse_text_normalization.zh.taggers.tokenize_and_classify import ClassifyFst
+        elif lang == "zh":
+            from fun_text_processing.inverse_text_normalization.zh.taggers.tokenize_and_classify import (
+                ClassifyFst,
+            )
             from fun_text_processing.inverse_text_normalization.zh.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
-        elif lang == 'tl':
-            from fun_text_processing.inverse_text_normalization.tl.taggers.tokenize_and_classify import ClassifyFst
+        elif lang == "tl":
+            from fun_text_processing.inverse_text_normalization.tl.taggers.tokenize_and_classify import (
+                ClassifyFst,
+            )
             from fun_text_processing.inverse_text_normalization.tl.verbalizers.verbalize_final import (
                 VerbalizeFinalFst,
             )
@@ -129,51 +158,62 @@
 
 def str2bool(s, default=False):
     s = s.lower()
-    if s == 'true':
+    if s == "true":
         return True
-    elif s == 'false':
+    elif s == "false":
         return False
     else:
         return default
+
 
 def parse_args():
     parser = ArgumentParser()
     input = parser.add_mutually_exclusive_group()
     input.add_argument("--text", dest="input_string", help="input string", type=str)
     input.add_argument("--input_file", dest="input_file", help="input file path", type=str)
-    parser.add_argument('--output_file', dest="output_file", help="output file path", type=str)
+    parser.add_argument("--output_file", dest="output_file", help="output file path", type=str)
     parser.add_argument(
-        "--language", help="language", choices=['en', 'id', 'ja', 'de', 'es', 'pt', 'ru', 'fr', 'vi', 'ko', 'zh', 'tl'], default="en", type=str
+        "--language",
+        help="language",
+        choices=["en", "id", "ja", "de", "es", "pt", "ru", "fr", "vi", "ko", "zh", "tl"],
+        default="en",
+        type=str,
     )
-    parser.add_argument("--verbose", help="print info for debugging", action='store_true')
-    parser.add_argument("--overwrite_cache", help="set to True to re-create .far grammar files", action="store_true")
+    parser.add_argument("--verbose", help="print info for debugging", action="store_true")
+    parser.add_argument(
+        "--overwrite_cache", help="set to True to re-create .far grammar files", action="store_true"
+    )
     parser.add_argument(
         "--cache_dir",
         help="path to a dir with .far grammar file. Set to None to avoid using cache",
         default=None,
         type=str,
     )
-    parser.add_argument('--enable_standalone_number', type=str,
-                        default='True',
-                        help='enable standalone number')
-    parser.add_argument('--enable_0_to_9', type=str,
-                        default='True',
-                        help='enable convert number 0 to 9')
+    parser.add_argument(
+        "--enable_standalone_number", type=str, default="True", help="enable standalone number"
+    )
+    parser.add_argument(
+        "--enable_0_to_9", type=str, default="True", help="enable convert number 0 to 9"
+    )
     return parser.parse_args()
 
 
 if __name__ == "__main__":
     args = parse_args()
     start_time = perf_counter()
-    if args.language == 'ja':
-        inverse_normalizer = InverseNormalizer(lang=args.language, cache_dir=args.cache_dir, overwrite_cache=args.overwrite_cache,
-                enable_standalone_number=str2bool(args.enable_standalone_number),
-                enable_0_to_9=str2bool(args.enable_0_to_9))
+    if args.language == "ja":
+        inverse_normalizer = InverseNormalizer(
+            lang=args.language,
+            cache_dir=args.cache_dir,
+            overwrite_cache=args.overwrite_cache,
+            enable_standalone_number=str2bool(args.enable_standalone_number),
+            enable_0_to_9=str2bool(args.enable_0_to_9),
+        )
     else:
         inverse_normalizer = InverseNormalizer(
             lang=args.language, cache_dir=args.cache_dir, overwrite_cache=args.overwrite_cache
         )
-    print(f'Time to generate graph: {round(perf_counter() - start_time, 2)} sec')
+    print(f"Time to generate graph: {round(perf_counter() - start_time, 2)} sec")
 
     if args.input_string:
         print(inverse_normalizer.inverse_normalize(args.input_string, verbose=args.verbose))

--
Gitblit v1.9.1