From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交
---
fun_text_processing/inverse_text_normalization/inverse_normalize.py | 128 ++++++++++++++++++++++++++++--------------
1 files changed, 84 insertions(+), 44 deletions(-)
diff --git a/fun_text_processing/inverse_text_normalization/inverse_normalize.py b/fun_text_processing/inverse_text_normalization/inverse_normalize.py
index fe6aa31..2dffd5d 100644
--- a/fun_text_processing/inverse_text_normalization/inverse_normalize.py
+++ b/fun_text_processing/inverse_text_normalization/inverse_normalize.py
@@ -21,73 +21,102 @@
overwrite_cache: set to True to overwrite .far files
"""
- def __init__(self, lang: str = 'en', cache_dir: str = None, overwrite_cache: bool = False,
- enable_standalone_number: bool = True,
- enable_0_to_9: bool = True):
+ def __init__(
+ self,
+ lang: str = "en",
+ cache_dir: str = None,
+ overwrite_cache: bool = False,
+ enable_standalone_number: bool = True,
+ enable_0_to_9: bool = True,
+ ):
- if lang == 'en':
- from fun_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
+ if lang == "en":
+ from fun_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import (
+ ClassifyFst,
+ )
from fun_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import (
VerbalizeFinalFst,
)
- elif lang == 'id':
- from fun_text_processing.inverse_text_normalization.id.taggers.tokenize_and_classify import ClassifyFst
+ elif lang == "id":
+ from fun_text_processing.inverse_text_normalization.id.taggers.tokenize_and_classify import (
+ ClassifyFst,
+ )
from fun_text_processing.inverse_text_normalization.id.verbalizers.verbalize_final import (
VerbalizeFinalFst,
)
- elif lang == 'ja':
- from fun_text_processing.inverse_text_normalization.ja.taggers.tokenize_and_classify import ClassifyFst
+ elif lang == "ja":
+ from fun_text_processing.inverse_text_normalization.ja.taggers.tokenize_and_classify import (
+ ClassifyFst,
+ )
from fun_text_processing.inverse_text_normalization.ja.verbalizers.verbalize_final import (
VerbalizeFinalFst,
)
- elif lang == 'es':
- from fun_text_processing.inverse_text_normalization.es.taggers.tokenize_and_classify import ClassifyFst
+ elif lang == "es":
+ from fun_text_processing.inverse_text_normalization.es.taggers.tokenize_and_classify import (
+ ClassifyFst,
+ )
from fun_text_processing.inverse_text_normalization.es.verbalizers.verbalize_final import (
VerbalizeFinalFst,
)
- elif lang == 'pt':
- from fun_text_processing.inverse_text_normalization.pt.taggers.tokenize_and_classify import ClassifyFst
+ elif lang == "pt":
+ from fun_text_processing.inverse_text_normalization.pt.taggers.tokenize_and_classify import (
+ ClassifyFst,
+ )
from fun_text_processing.inverse_text_normalization.pt.verbalizers.verbalize_final import (
VerbalizeFinalFst,
)
- elif lang == 'ru':
- from fun_text_processing.inverse_text_normalization.ru.taggers.tokenize_and_classify import ClassifyFst
+ elif lang == "ru":
+ from fun_text_processing.inverse_text_normalization.ru.taggers.tokenize_and_classify import (
+ ClassifyFst,
+ )
from fun_text_processing.inverse_text_normalization.ru.verbalizers.verbalize_final import (
VerbalizeFinalFst,
)
- elif lang == 'de':
- from fun_text_processing.inverse_text_normalization.de.taggers.tokenize_and_classify import ClassifyFst
+ elif lang == "de":
+ from fun_text_processing.inverse_text_normalization.de.taggers.tokenize_and_classify import (
+ ClassifyFst,
+ )
from fun_text_processing.inverse_text_normalization.de.verbalizers.verbalize_final import (
VerbalizeFinalFst,
)
- elif lang == 'fr':
- from fun_text_processing.inverse_text_normalization.fr.taggers.tokenize_and_classify import ClassifyFst
+ elif lang == "fr":
+ from fun_text_processing.inverse_text_normalization.fr.taggers.tokenize_and_classify import (
+ ClassifyFst,
+ )
from fun_text_processing.inverse_text_normalization.fr.verbalizers.verbalize_final import (
VerbalizeFinalFst,
)
- elif lang == 'vi':
- from fun_text_processing.inverse_text_normalization.vi.taggers.tokenize_and_classify import ClassifyFst
+ elif lang == "vi":
+ from fun_text_processing.inverse_text_normalization.vi.taggers.tokenize_and_classify import (
+ ClassifyFst,
+ )
from fun_text_processing.inverse_text_normalization.vi.verbalizers.verbalize_final import (
VerbalizeFinalFst,
)
- elif lang == 'ko':
- from fun_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
+ elif lang == "ko":
+ from fun_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import (
+ ClassifyFst,
+ )
from fun_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
VerbalizeFinalFst,
)
- elif lang == 'zh':
- from fun_text_processing.inverse_text_normalization.zh.taggers.tokenize_and_classify import ClassifyFst
+ elif lang == "zh":
+ from fun_text_processing.inverse_text_normalization.zh.taggers.tokenize_and_classify import (
+ ClassifyFst,
+ )
from fun_text_processing.inverse_text_normalization.zh.verbalizers.verbalize_final import (
VerbalizeFinalFst,
)
- elif lang == 'tl':
- from fun_text_processing.inverse_text_normalization.tl.taggers.tokenize_and_classify import ClassifyFst
+ elif lang == "tl":
+ from fun_text_processing.inverse_text_normalization.tl.taggers.tokenize_and_classify import (
+ ClassifyFst,
+ )
from fun_text_processing.inverse_text_normalization.tl.verbalizers.verbalize_final import (
VerbalizeFinalFst,
)
@@ -129,51 +158,62 @@
def str2bool(s, default=False):
s = s.lower()
- if s == 'true':
+ if s == "true":
return True
- elif s == 'false':
+ elif s == "false":
return False
else:
return default
+
def parse_args():
parser = ArgumentParser()
input = parser.add_mutually_exclusive_group()
input.add_argument("--text", dest="input_string", help="input string", type=str)
input.add_argument("--input_file", dest="input_file", help="input file path", type=str)
- parser.add_argument('--output_file', dest="output_file", help="output file path", type=str)
+ parser.add_argument("--output_file", dest="output_file", help="output file path", type=str)
parser.add_argument(
- "--language", help="language", choices=['en', 'id', 'ja', 'de', 'es', 'pt', 'ru', 'fr', 'vi', 'ko', 'zh', 'tl'], default="en", type=str
+ "--language",
+ help="language",
+ choices=["en", "id", "ja", "de", "es", "pt", "ru", "fr", "vi", "ko", "zh", "tl"],
+ default="en",
+ type=str,
)
- parser.add_argument("--verbose", help="print info for debugging", action='store_true')
- parser.add_argument("--overwrite_cache", help="set to True to re-create .far grammar files", action="store_true")
+ parser.add_argument("--verbose", help="print info for debugging", action="store_true")
+ parser.add_argument(
+ "--overwrite_cache", help="set to True to re-create .far grammar files", action="store_true"
+ )
parser.add_argument(
"--cache_dir",
help="path to a dir with .far grammar file. Set to None to avoid using cache",
default=None,
type=str,
)
- parser.add_argument('--enable_standalone_number', type=str,
- default='True',
- help='enable standalone number')
- parser.add_argument('--enable_0_to_9', type=str,
- default='True',
- help='enable convert number 0 to 9')
+ parser.add_argument(
+ "--enable_standalone_number", type=str, default="True", help="enable standalone number"
+ )
+ parser.add_argument(
+ "--enable_0_to_9", type=str, default="True", help="enable convert number 0 to 9"
+ )
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
start_time = perf_counter()
- if args.language == 'ja':
- inverse_normalizer = InverseNormalizer(lang=args.language, cache_dir=args.cache_dir, overwrite_cache=args.overwrite_cache,
- enable_standalone_number=str2bool(args.enable_standalone_number),
- enable_0_to_9=str2bool(args.enable_0_to_9))
+ if args.language == "ja":
+ inverse_normalizer = InverseNormalizer(
+ lang=args.language,
+ cache_dir=args.cache_dir,
+ overwrite_cache=args.overwrite_cache,
+ enable_standalone_number=str2bool(args.enable_standalone_number),
+ enable_0_to_9=str2bool(args.enable_0_to_9),
+ )
else:
inverse_normalizer = InverseNormalizer(
lang=args.language, cache_dir=args.cache_dir, overwrite_cache=args.overwrite_cache
)
- print(f'Time to generate graph: {round(perf_counter() - start_time, 2)} sec')
+ print(f"Time to generate graph: {round(perf_counter() - start_time, 2)} sec")
if args.input_string:
print(inverse_normalizer.inverse_normalize(args.input_string, verbose=args.verbose))
--
Gitblit v1.9.1