王梦迪
2025-05-20 fe588bc508c0076bb007d6ed36c18ac8ecb341ac
fun_text_processing/inverse_text_normalization/inverse_normalize.py
@@ -21,73 +21,102 @@
        overwrite_cache: set to True to overwrite .far files
    """
    def __init__(self, lang: str = 'en', cache_dir: str = None, overwrite_cache: bool = False,
                 enable_standalone_number: bool = True,
                 enable_0_to_9: bool = True):
    def __init__(
        self,
        lang: str = "en",
        cache_dir: str = None,
        overwrite_cache: bool = False,
        enable_standalone_number: bool = True,
        enable_0_to_9: bool = True,
    ):
        if lang == 'en':
            from fun_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ClassifyFst
        if lang == "en":
            from fun_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import (
                ClassifyFst,
            )
            from fun_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import (
                VerbalizeFinalFst,
            )
        elif lang == 'id':
            from fun_text_processing.inverse_text_normalization.id.taggers.tokenize_and_classify import ClassifyFst
        elif lang == "id":
            from fun_text_processing.inverse_text_normalization.id.taggers.tokenize_and_classify import (
                ClassifyFst,
            )
            from fun_text_processing.inverse_text_normalization.id.verbalizers.verbalize_final import (
                VerbalizeFinalFst,
            )
        elif lang == 'ja':
            from fun_text_processing.inverse_text_normalization.ja.taggers.tokenize_and_classify import ClassifyFst
        elif lang == "ja":
            from fun_text_processing.inverse_text_normalization.ja.taggers.tokenize_and_classify import (
                ClassifyFst,
            )
            from fun_text_processing.inverse_text_normalization.ja.verbalizers.verbalize_final import (
                VerbalizeFinalFst,
            )
        elif lang == 'es':
            from fun_text_processing.inverse_text_normalization.es.taggers.tokenize_and_classify import ClassifyFst
        elif lang == "es":
            from fun_text_processing.inverse_text_normalization.es.taggers.tokenize_and_classify import (
                ClassifyFst,
            )
            from fun_text_processing.inverse_text_normalization.es.verbalizers.verbalize_final import (
                VerbalizeFinalFst,
            )
        elif lang == 'pt':
            from fun_text_processing.inverse_text_normalization.pt.taggers.tokenize_and_classify import ClassifyFst
        elif lang == "pt":
            from fun_text_processing.inverse_text_normalization.pt.taggers.tokenize_and_classify import (
                ClassifyFst,
            )
            from fun_text_processing.inverse_text_normalization.pt.verbalizers.verbalize_final import (
                VerbalizeFinalFst,
            )
        elif lang == 'ru':
            from fun_text_processing.inverse_text_normalization.ru.taggers.tokenize_and_classify import ClassifyFst
        elif lang == "ru":
            from fun_text_processing.inverse_text_normalization.ru.taggers.tokenize_and_classify import (
                ClassifyFst,
            )
            from fun_text_processing.inverse_text_normalization.ru.verbalizers.verbalize_final import (
                VerbalizeFinalFst,
            )
        elif lang == 'de':
            from fun_text_processing.inverse_text_normalization.de.taggers.tokenize_and_classify import ClassifyFst
        elif lang == "de":
            from fun_text_processing.inverse_text_normalization.de.taggers.tokenize_and_classify import (
                ClassifyFst,
            )
            from fun_text_processing.inverse_text_normalization.de.verbalizers.verbalize_final import (
                VerbalizeFinalFst,
            )
        elif lang == 'fr':
            from fun_text_processing.inverse_text_normalization.fr.taggers.tokenize_and_classify import ClassifyFst
        elif lang == "fr":
            from fun_text_processing.inverse_text_normalization.fr.taggers.tokenize_and_classify import (
                ClassifyFst,
            )
            from fun_text_processing.inverse_text_normalization.fr.verbalizers.verbalize_final import (
                VerbalizeFinalFst,
            )
        elif lang == 'vi':
            from fun_text_processing.inverse_text_normalization.vi.taggers.tokenize_and_classify import ClassifyFst
        elif lang == "vi":
            from fun_text_processing.inverse_text_normalization.vi.taggers.tokenize_and_classify import (
                ClassifyFst,
            )
            from fun_text_processing.inverse_text_normalization.vi.verbalizers.verbalize_final import (
                VerbalizeFinalFst,
            )
        elif lang == 'ko':
            from fun_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst
        elif lang == "ko":
            from fun_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import (
                ClassifyFst,
            )
            from fun_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import (
                VerbalizeFinalFst,
            )
        elif lang == 'zh':
            from fun_text_processing.inverse_text_normalization.zh.taggers.tokenize_and_classify import ClassifyFst
        elif lang == "zh":
            from fun_text_processing.inverse_text_normalization.zh.taggers.tokenize_and_classify import (
                ClassifyFst,
            )
            from fun_text_processing.inverse_text_normalization.zh.verbalizers.verbalize_final import (
                VerbalizeFinalFst,
            )
        elif lang == 'tl':
            from fun_text_processing.inverse_text_normalization.tl.taggers.tokenize_and_classify import ClassifyFst
        elif lang == "tl":
            from fun_text_processing.inverse_text_normalization.tl.taggers.tokenize_and_classify import (
                ClassifyFst,
            )
            from fun_text_processing.inverse_text_normalization.tl.verbalizers.verbalize_final import (
                VerbalizeFinalFst,
            )
@@ -129,51 +158,62 @@
def str2bool(s, default=False):
    s = s.lower()
    if s == 'true':
    if s == "true":
        return True
    elif s == 'false':
    elif s == "false":
        return False
    else:
        return default
def parse_args():
    parser = ArgumentParser()
    input = parser.add_mutually_exclusive_group()
    input.add_argument("--text", dest="input_string", help="input string", type=str)
    input.add_argument("--input_file", dest="input_file", help="input file path", type=str)
    parser.add_argument('--output_file', dest="output_file", help="output file path", type=str)
    parser.add_argument("--output_file", dest="output_file", help="output file path", type=str)
    parser.add_argument(
        "--language", help="language", choices=['en', 'id', 'ja', 'de', 'es', 'pt', 'ru', 'fr', 'vi', 'ko', 'zh', 'tl'], default="en", type=str
        "--language",
        help="language",
        choices=["en", "id", "ja", "de", "es", "pt", "ru", "fr", "vi", "ko", "zh", "tl"],
        default="en",
        type=str,
    )
    parser.add_argument("--verbose", help="print info for debugging", action='store_true')
    parser.add_argument("--overwrite_cache", help="set to True to re-create .far grammar files", action="store_true")
    parser.add_argument("--verbose", help="print info for debugging", action="store_true")
    parser.add_argument(
        "--overwrite_cache", help="set to True to re-create .far grammar files", action="store_true"
    )
    parser.add_argument(
        "--cache_dir",
        help="path to a dir with .far grammar file. Set to None to avoid using cache",
        default=None,
        type=str,
    )
    parser.add_argument('--enable_standalone_number', type=str,
                        default='True',
                        help='enable standalone number')
    parser.add_argument('--enable_0_to_9', type=str,
                        default='True',
                        help='enable convert number 0 to 9')
    parser.add_argument(
        "--enable_standalone_number", type=str, default="True", help="enable standalone number"
    )
    parser.add_argument(
        "--enable_0_to_9", type=str, default="True", help="enable convert number 0 to 9"
    )
    return parser.parse_args()
if __name__ == "__main__":
    args = parse_args()
    start_time = perf_counter()
    if args.language == 'ja':
        inverse_normalizer = InverseNormalizer(lang=args.language, cache_dir=args.cache_dir, overwrite_cache=args.overwrite_cache,
                enable_standalone_number=str2bool(args.enable_standalone_number),
                enable_0_to_9=str2bool(args.enable_0_to_9))
    if args.language == "ja":
        inverse_normalizer = InverseNormalizer(
            lang=args.language,
            cache_dir=args.cache_dir,
            overwrite_cache=args.overwrite_cache,
            enable_standalone_number=str2bool(args.enable_standalone_number),
            enable_0_to_9=str2bool(args.enable_0_to_9),
        )
    else:
        inverse_normalizer = InverseNormalizer(
            lang=args.language, cache_dir=args.cache_dir, overwrite_cache=args.overwrite_cache
        )
    print(f'Time to generate graph: {round(perf_counter() - start_time, 2)} sec')
    print(f"Time to generate graph: {round(perf_counter() - start_time, 2)} sec")
    if args.input_string:
        print(inverse_normalizer.inverse_normalize(args.input_string, verbose=args.verbose))