From 810046e3df4910c8f5c1a90e4b53aca45b3397e8 Mon Sep 17 00:00:00 2001
From: wuhongsheng <664116298@qq.com>
Date: 星期一, 01 七月 2024 10:42:58 +0800
Subject: [PATCH] 优化merge segments 参数,解决新闻联播男女主持人“晚上好”合并一个speakid问题 (#1861)
---
fun_text_processing/inverse_text_normalization/fr/verbalizers/whitelist.py | 8 +++-----
1 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/fun_text_processing/inverse_text_normalization/fr/verbalizers/whitelist.py b/fun_text_processing/inverse_text_normalization/fr/verbalizers/whitelist.py
index c94eebf..889dc88 100644
--- a/fun_text_processing/inverse_text_normalization/fr/verbalizers/whitelist.py
+++ b/fun_text_processing/inverse_text_normalization/fr/verbalizers/whitelist.py
@@ -1,5 +1,3 @@
-
-
import pynini
from fun_text_processing.inverse_text_normalization.fr.graph_utils import (
DAMO_CHAR,
@@ -21,9 +19,9 @@
graph = (
pynutil.delete("name:")
+ delete_space
- + pynutil.delete("\"")
+ + pynutil.delete('"')
+ pynini.closure(DAMO_CHAR - " ", 1)
- + pynutil.delete("\"")
+ + pynutil.delete('"')
)
- graph = graph @ pynini.cdrewrite(pynini.cross(u"\u00A0", " "), "", "", DAMO_SIGMA)
+ graph = graph @ pynini.cdrewrite(pynini.cross("\u00A0", " "), "", "", DAMO_SIGMA)
self.fst = graph.optimize()
--
Gitblit v1.9.1