kongdeqiang
2026-03-13 28ccfbfc51068a663a80764e14074df5edf2b5ba
fun_text_processing/text_normalization/de/verbalizers/electronic.py
@@ -1,17 +1,3 @@
# Copyright NeMo (https://github.com/NVIDIA/NeMo). All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pynini
from fun_text_processing.text_normalization.de.utils import get_abs_path
from fun_text_processing.text_normalization.en.graph_utils import (
@@ -41,7 +27,9 @@
        graph_digit_no_zero = pynini.invert(
            pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
        ).optimize() | pynini.cross("1", "eins")
        graph_zero = pynini.invert(pynini.string_file(get_abs_path("data/numbers/zero.tsv"))).optimize()
        graph_zero = pynini.invert(
            pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
        ).optimize()
        graph_digit = graph_digit_no_zero | graph_zero
        graph_symbols = pynini.string_file(get_abs_path("data/electronic/symbols.tsv")).optimize()
        server_common = pynini.string_file(get_abs_path("data/electronic/server_name.tsv"))
@@ -54,18 +42,20 @@
        verbalize_characters = pynini.cdrewrite(graph_symbols | graph_digit, "", "", DAMO_SIGMA)
        user_name = pynutil.delete("username: \"") + add_space_after_char() + pynutil.delete("\"")
        user_name = pynutil.delete('username: "') + add_space_after_char() + pynutil.delete('"')
        user_name @= verbalize_characters
        convert_defaults = pynutil.add_weight(DAMO_NOT_QUOTE, weight=0.0001) | domain_common | server_common
        convert_defaults = (
            pynutil.add_weight(DAMO_NOT_QUOTE, weight=0.0001) | domain_common | server_common
        )
        domain = convert_defaults + pynini.closure(insert_space + convert_defaults)
        domain @= verbalize_characters
        domain = pynutil.delete("domain: \"") + domain + pynutil.delete("\"")
        domain = pynutil.delete('domain: "') + domain + pynutil.delete('"')
        protocol = (
            pynutil.delete("protocol: \"")
            pynutil.delete('protocol: "')
            + add_space_after_char() @ pynini.cdrewrite(graph_symbols, "", "", DAMO_SIGMA)
            + pynutil.delete("\"")
            + pynutil.delete('"')
        )
        self.graph = (pynini.closure(protocol + pynini.accep(" "), 0, 1) + domain) | (
            user_name + pynini.accep(" ") + pynutil.insert("at ") + domain