From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交
---
fun_text_processing/text_normalization/en/verbalizers/electronic.py | 32 +++++++++++++++++++++-----------
1 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/fun_text_processing/text_normalization/en/verbalizers/electronic.py b/fun_text_processing/text_normalization/en/verbalizers/electronic.py
index 3a30dc2..142a9dd 100644
--- a/fun_text_processing/text_normalization/en/verbalizers/electronic.py
+++ b/fun_text_processing/text_normalization/en/verbalizers/electronic.py
@@ -1,5 +1,3 @@
-
-
import pynini
from fun_text_processing.text_normalization.en.graph_utils import (
DAMO_NOT_QUOTE,
@@ -28,7 +26,9 @@
def __init__(self, deterministic: bool = True):
super().__init__(name="electronic", kind="verbalize", deterministic=deterministic)
- graph_digit_no_zero = pynini.invert(pynini.string_file(get_abs_path("data/number/digit.tsv"))).optimize()
+ graph_digit_no_zero = pynini.invert(
+ pynini.string_file(get_abs_path("data/number/digit.tsv"))
+ ).optimize()
graph_zero = pynini.cross("0", "zero")
if not deterministic:
@@ -38,7 +38,10 @@
graph_symbols = pynini.string_file(get_abs_path("data/electronic/symbol.tsv")).optimize()
default_chars_symbols = pynini.cdrewrite(
- pynutil.insert(" ") + (graph_symbols | graph_digit) + pynutil.insert(" "), "", "", DAMO_SIGMA
+ pynutil.insert(" ") + (graph_symbols | graph_digit) + pynutil.insert(" "),
+ "",
+ "",
+ DAMO_SIGMA,
)
default_chars_symbols = pynini.compose(
pynini.closure(DAMO_NOT_SPACE), default_chars_symbols.optimize()
@@ -47,9 +50,9 @@
user_name = (
pynutil.delete("username:")
+ delete_space
- + pynutil.delete("\"")
+ + pynutil.delete('"')
+ default_chars_symbols
- + pynutil.delete("\"")
+ + pynutil.delete('"')
)
domain_common = pynini.string_file(get_abs_path("data/electronic/domain.tsv"))
@@ -58,22 +61,29 @@
default_chars_symbols
+ insert_space
+ plurals._priority_union(
- domain_common, pynutil.add_weight(pynini.cross(".", "dot"), weight=0.0001), DAMO_SIGMA
+ domain_common,
+ pynutil.add_weight(pynini.cross(".", "dot"), weight=0.0001),
+ DAMO_SIGMA,
)
+ pynini.closure(
- insert_space + (pynini.cdrewrite(TO_UPPER, "", "", DAMO_SIGMA) @ default_chars_symbols), 0, 1
+ insert_space
+ + (pynini.cdrewrite(TO_UPPER, "", "", DAMO_SIGMA) @ default_chars_symbols),
+ 0,
+ 1,
)
)
domain = (
pynutil.delete("domain:")
+ delete_space
- + pynutil.delete("\"")
+ + pynutil.delete('"')
+ domain
+ delete_space
- + pynutil.delete("\"")
+ + pynutil.delete('"')
).optimize()
- protocol = pynutil.delete("protocol: \"") + pynini.closure(DAMO_NOT_QUOTE, 1) + pynutil.delete("\"")
+ protocol = (
+ pynutil.delete('protocol: "') + pynini.closure(DAMO_NOT_QUOTE, 1) + pynutil.delete('"')
+ )
graph = (
pynini.closure(protocol + delete_space, 0, 1)
+ pynini.closure(user_name + delete_space + pynutil.insert(" at ") + delete_space, 0, 1)
--
Gitblit v1.9.1