| | |
| | | # Licensed under the Apache License, Version 2.0 (the "License"); |
| | | # you may not use this file except in compliance with the License. |
| | | # You may obtain a copy of the License at |
| | | # |
| | | # http://www.apache.org/licenses/LICENSE-2.0 |
| | | # |
| | | # Unless required by applicable law or agreed to in writing, software |
| | | # distributed under the License is distributed on an "AS IS" BASIS, |
| | | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| | | # See the License for the specific language governing permissions and |
| | | # limitations under the License. |
| | | |
| | | import os |
| | | from time import perf_counter |
| | | from argparse import ArgumentParser |
| | | from fun_text_processing.text_normalization.en.graph_utils import generator_main |
| | | |
| | | |
| | | def parse_args(): |
| | | parser = ArgumentParser() |
| | | |
| | | |
| | | parser.add_argument( |
| | | "--language", help="language", choices=['de', 'en', 'es', 'fr', 'id', 'ja', 'ko', 'pt', 'ru', 'vi', 'zh'], default="en", type=str |
| | | "--language", |
| | | help="language", |
| | | choices=["de", "en", "es", "fr", "id", "ja", "ko", "pt", "ru", "vi", "zh"], |
| | | default="en", |
| | | type=str, |
| | | ) |
| | | parser.add_argument( |
| | | "--export_dir", |
| | |
| | | ) |
| | | return parser.parse_args() |
| | | |
| | | def get_grammars(lang: str='en'): |
| | | if lang=='de': |
| | | from fun_text_processing.inverse_text_normalization.de.taggers.tokenize_and_classify import ClassifyFst |
| | | from fun_text_processing.inverse_text_normalization.de.verbalizers.verbalize_final import VerbalizeFinalFst |
| | | elif lang=='en': |
| | | from fun_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ClassifyFst |
| | | from fun_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst |
| | | elif lang=='es': |
| | | from fun_text_processing.inverse_text_normalization.es.taggers.tokenize_and_classify import ClassifyFst |
| | | from fun_text_processing.inverse_text_normalization.es.verbalizers.verbalize_final import VerbalizeFinalFst |
| | | elif lang=='fr': |
| | | from fun_text_processing.inverse_text_normalization.fr.taggers.tokenize_and_classify import ClassifyFst |
| | | from fun_text_processing.inverse_text_normalization.fr.verbalizers.verbalize_final import VerbalizeFinalFst |
| | | elif lang=='id': |
| | | from fun_text_processing.inverse_text_normalization.id.taggers.tokenize_and_classify import ClassifyFst |
| | | from fun_text_processing.inverse_text_normalization.id.verbalizers.verbalize_final import VerbalizeFinalFst |
| | | elif lang=='ja': |
| | | from fun_text_processing.inverse_text_normalization.ja.taggers.tokenize_and_classify import ClassifyFst |
| | | from fun_text_processing.inverse_text_normalization.ja.verbalizers.verbalize_final import VerbalizeFinalFst |
| | | elif lang=='ko': |
| | | from fun_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ClassifyFst |
| | | from fun_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import VerbalizeFinalFst |
| | | elif lang=='pt': |
| | | from fun_text_processing.inverse_text_normalization.pt.taggers.tokenize_and_classify import ClassifyFst |
| | | from fun_text_processing.inverse_text_normalization.pt.verbalizers.verbalize_final import VerbalizeFinalFst |
| | | elif lang=='ru': |
| | | from fun_text_processing.inverse_text_normalization.ru.taggers.tokenize_and_classify import ClassifyFst |
| | | from fun_text_processing.inverse_text_normalization.ru.verbalizers.verbalize_final import VerbalizeFinalFst |
| | | elif lang=='vi': |
| | | from fun_text_processing.inverse_text_normalization.vi.taggers.tokenize_and_classify import ClassifyFst |
| | | from fun_text_processing.inverse_text_normalization.vi.verbalizers.verbalize_final import VerbalizeFinalFst |
| | | elif lang=='zh': |
| | | from fun_text_processing.inverse_text_normalization.zh.taggers.tokenize_and_classify import ClassifyFst |
| | | from fun_text_processing.inverse_text_normalization.zh.verbalizers.verbalize_final import VerbalizeFinalFst |
| | | else: |
| | | from fun_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ClassifyFst |
| | | from fun_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import VerbalizeFinalFst |
| | | |
| | | def get_grammars(lang: str = "en"): |
| | | if lang == "de": |
| | | from fun_text_processing.inverse_text_normalization.de.taggers.tokenize_and_classify import ( |
| | | ClassifyFst, |
| | | ) |
| | | from fun_text_processing.inverse_text_normalization.de.verbalizers.verbalize_final import ( |
| | | VerbalizeFinalFst, |
| | | ) |
| | | elif lang == "en": |
| | | from fun_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ( |
| | | ClassifyFst, |
| | | ) |
| | | from fun_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import ( |
| | | VerbalizeFinalFst, |
| | | ) |
| | | elif lang == "es": |
| | | from fun_text_processing.inverse_text_normalization.es.taggers.tokenize_and_classify import ( |
| | | ClassifyFst, |
| | | ) |
| | | from fun_text_processing.inverse_text_normalization.es.verbalizers.verbalize_final import ( |
| | | VerbalizeFinalFst, |
| | | ) |
| | | elif lang == "fr": |
| | | from fun_text_processing.inverse_text_normalization.fr.taggers.tokenize_and_classify import ( |
| | | ClassifyFst, |
| | | ) |
| | | from fun_text_processing.inverse_text_normalization.fr.verbalizers.verbalize_final import ( |
| | | VerbalizeFinalFst, |
| | | ) |
| | | elif lang == "id": |
| | | from fun_text_processing.inverse_text_normalization.id.taggers.tokenize_and_classify import ( |
| | | ClassifyFst, |
| | | ) |
| | | from fun_text_processing.inverse_text_normalization.id.verbalizers.verbalize_final import ( |
| | | VerbalizeFinalFst, |
| | | ) |
| | | elif lang == "ja": |
| | | from fun_text_processing.inverse_text_normalization.ja.taggers.tokenize_and_classify import ( |
| | | ClassifyFst, |
| | | ) |
| | | from fun_text_processing.inverse_text_normalization.ja.verbalizers.verbalize_final import ( |
| | | VerbalizeFinalFst, |
| | | ) |
| | | elif lang == "ko": |
| | | from fun_text_processing.inverse_text_normalization.ko.taggers.tokenize_and_classify import ( |
| | | ClassifyFst, |
| | | ) |
| | | from fun_text_processing.inverse_text_normalization.ko.verbalizers.verbalize_final import ( |
| | | VerbalizeFinalFst, |
| | | ) |
| | | elif lang == "pt": |
| | | from fun_text_processing.inverse_text_normalization.pt.taggers.tokenize_and_classify import ( |
| | | ClassifyFst, |
| | | ) |
| | | from fun_text_processing.inverse_text_normalization.pt.verbalizers.verbalize_final import ( |
| | | VerbalizeFinalFst, |
| | | ) |
| | | elif lang == "ru": |
| | | from fun_text_processing.inverse_text_normalization.ru.taggers.tokenize_and_classify import ( |
| | | ClassifyFst, |
| | | ) |
| | | from fun_text_processing.inverse_text_normalization.ru.verbalizers.verbalize_final import ( |
| | | VerbalizeFinalFst, |
| | | ) |
| | | elif lang == "vi": |
| | | from fun_text_processing.inverse_text_normalization.vi.taggers.tokenize_and_classify import ( |
| | | ClassifyFst, |
| | | ) |
| | | from fun_text_processing.inverse_text_normalization.vi.verbalizers.verbalize_final import ( |
| | | VerbalizeFinalFst, |
| | | ) |
| | | elif lang == "zh": |
| | | from fun_text_processing.inverse_text_normalization.zh.taggers.tokenize_and_classify import ( |
| | | ClassifyFst, |
| | | ) |
| | | from fun_text_processing.inverse_text_normalization.zh.verbalizers.verbalize_final import ( |
| | | VerbalizeFinalFst, |
| | | ) |
| | | else: |
| | | from fun_text_processing.inverse_text_normalization.en.taggers.tokenize_and_classify import ( |
| | | ClassifyFst, |
| | | ) |
| | | from fun_text_processing.inverse_text_normalization.en.verbalizers.verbalize_final import ( |
| | | VerbalizeFinalFst, |
| | | ) |
| | | |
| | | return ClassifyFst().fst, VerbalizeFinalFst().fst |
| | | return ClassifyFst().fst, VerbalizeFinalFst().fst |
| | | |
| | | |
| | | if __name__ == "__main__": |
| | | args = parse_args() |
| | |
| | | tagger_fst, verbalizer_fst = get_grammars(args.language) |
| | | generator_main(tagger_far_file, {"tokenize_and_classify": tagger_fst}) |
| | | generator_main(verbalizer_far_file, {"verbalize": verbalizer_fst}) |
| | | print(f'Time to generate graph: {round(perf_counter() - start_time, 2)} sec') |
| | | |
| | | print(f"Time to generate graph: {round(perf_counter() - start_time, 2)} sec") |