From 3df109adfccedeb134dea4ba2ea9a2da89872048 Mon Sep 17 00:00:00 2001
From: Isuxiz Slidder <48672727+Isuxiz@users.noreply.github.com>
Date: 星期一, 31 三月 2025 17:51:52 +0800
Subject: [PATCH] Update model.py to fix "IndexError: index 1 is out of bounds for dimension 1 with size 0" (#2454)

---
 fun_text_processing/text_normalization/export_models.py |   84 +++++++++++++++++++++++++++++------------
 1 files changed, 59 insertions(+), 25 deletions(-)

diff --git a/fun_text_processing/text_normalization/export_models.py b/fun_text_processing/text_normalization/export_models.py
index 72c3ff8..09fab45 100644
--- a/fun_text_processing/text_normalization/export_models.py
+++ b/fun_text_processing/text_normalization/export_models.py
@@ -3,14 +3,23 @@
 from argparse import ArgumentParser
 from fun_text_processing.text_normalization.en.graph_utils import generator_main
 
+
 def parse_args():
     parser = ArgumentParser()
-    
+
     parser.add_argument(
-        "--language", help="language", choices=['de', 'en', 'es', 'ru', 'zh'], default="en", type=str
+        "--language",
+        help="language",
+        choices=["de", "en", "es", "ru", "zh"],
+        default="en",
+        type=str,
     )
     parser.add_argument(
-        "--input_case", help="input capitalization", choices=["lower_cased", "cased"], default="cased", type=str
+        "--input_case",
+        help="input capitalization",
+        choices=["lower_cased", "cased"],
+        default="cased",
+        type=str,
     )
     parser.add_argument(
         "--export_dir",
@@ -20,27 +29,53 @@
     )
     return parser.parse_args()
 
-def get_grammars(lang: str="en", input_case: str="cased"):
-  if lang=='de':
-    from fun_text_processing.text_normalization.de.taggers.tokenize_and_classify import ClassifyFst
-    from fun_text_processing.text_normalization.de.verbalizers.verbalize_final import VerbalizeFinalFst
-  elif lang=='en':
-    from fun_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
-    from fun_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
-  elif lang=='es':
-    from fun_text_processing.text_normalization.es.taggers.tokenize_and_classify import ClassifyFst
-    from fun_text_processing.text_normalization.es.verbalizers.verbalize_final import VerbalizeFinalFst
-  elif lang=='ru':
-    from fun_text_processing.text_normalization.ru.taggers.tokenize_and_classify import ClassifyFst
-    from fun_text_processing.text_normalization.ru.verbalizers.verbalize_final import VerbalizeFinalFst
-  elif lang=='zh':
-    from fun_text_processing.text_normalization.zh.taggers.tokenize_and_classify import ClassifyFst
-    from fun_text_processing.text_normalization.zh.verbalizers.verbalize_final import VerbalizeFinalFst
-  else:
-    from fun_text_processing.text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
-    from fun_text_processing.text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst
 
-  return ClassifyFst(input_case=input_case).fst, VerbalizeFinalFst().fst
+def get_grammars(lang: str = "en", input_case: str = "cased"):
+    if lang == "de":
+        from fun_text_processing.text_normalization.de.taggers.tokenize_and_classify import (
+            ClassifyFst,
+        )
+        from fun_text_processing.text_normalization.de.verbalizers.verbalize_final import (
+            VerbalizeFinalFst,
+        )
+    elif lang == "en":
+        from fun_text_processing.text_normalization.en.taggers.tokenize_and_classify import (
+            ClassifyFst,
+        )
+        from fun_text_processing.text_normalization.en.verbalizers.verbalize_final import (
+            VerbalizeFinalFst,
+        )
+    elif lang == "es":
+        from fun_text_processing.text_normalization.es.taggers.tokenize_and_classify import (
+            ClassifyFst,
+        )
+        from fun_text_processing.text_normalization.es.verbalizers.verbalize_final import (
+            VerbalizeFinalFst,
+        )
+    elif lang == "ru":
+        from fun_text_processing.text_normalization.ru.taggers.tokenize_and_classify import (
+            ClassifyFst,
+        )
+        from fun_text_processing.text_normalization.ru.verbalizers.verbalize_final import (
+            VerbalizeFinalFst,
+        )
+    elif lang == "zh":
+        from fun_text_processing.text_normalization.zh.taggers.tokenize_and_classify import (
+            ClassifyFst,
+        )
+        from fun_text_processing.text_normalization.zh.verbalizers.verbalize_final import (
+            VerbalizeFinalFst,
+        )
+    else:
+        from fun_text_processing.text_normalization.en.taggers.tokenize_and_classify import (
+            ClassifyFst,
+        )
+        from fun_text_processing.text_normalization.en.verbalizers.verbalize_final import (
+            VerbalizeFinalFst,
+        )
+
+    return ClassifyFst(input_case=input_case).fst, VerbalizeFinalFst().fst
+
 
 if __name__ == "__main__":
     args = parse_args()
@@ -54,5 +89,4 @@
     tagger_fst, verbalizer_fst = get_grammars(args.language, args.input_case)
     generator_main(tagger_far_file, {"tokenize_and_classify": tagger_fst})
     generator_main(verbalizer_far_file, {"verbalize": verbalizer_fst})
-    print(f'Time to generate graph: {round(perf_counter() - start_time, 2)} sec')
-
+    print(f"Time to generate graph: {round(perf_counter() - start_time, 2)} sec")

--
Gitblit v1.9.1