shixian.shi
2023-05-04 1988fe85f6d4e2d2f809e705e13d69d0b57bd0fc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
 
 
import pynini
from fun_text_processing.text_normalization.zh.graph_utils import FUN_NOT_QUOTE, GraphFst, delete_space
from fun_text_processing.text_normalization.zh.utils import UNIT_1e01, get_abs_path
from pynini.lib import pynutil
 
 
class Time(GraphFst):
    '''
        tokens { time { h: "1" m: "02" s: "36" } } -> 一点零二分三十六秒
        tokens { time { suffix "am"  hours: "1" minutes: "02" seconds: "36" } } -> 上午一点零二分三十六秒
    '''
 
    def __init__(self, deterministic: bool = True, lm: bool = False):
        super().__init__(name="time", kind="verbalize", deterministic=deterministic)
        graph_digit = pynini.string_file(get_abs_path("data/number/digit.tsv"))
        graph_teen = pynini.string_file(get_abs_path("data/number/digit_teen.tsv"))
        graph_zero = pynini.string_file(get_abs_path("data/number/zero.tsv"))
        graph_no_zero = pynini.cross("0", "")
 
        graph_digit_no_zero = graph_digit | graph_no_zero
 
        graph_2_digit_zero_none = pynini.cross("0", "") + pynini.cross("0", "")
        graph_2_digit_zero = pynini.cross("00", "零")
 
        graph_2_digit_time = (graph_teen + pynutil.insert(UNIT_1e01) + graph_digit_no_zero) | (
            graph_zero + graph_digit
        )
        h = graph_2_digit_time | graph_2_digit_zero | graph_digit
        m = graph_2_digit_time | graph_2_digit_zero
        s = graph_2_digit_time | graph_2_digit_zero
 
        # 6:25
        h_m = (
            pynutil.delete("hours: \"")
            + h
            + pynutil.insert("点")
            + pynutil.delete("\"")
            + delete_space
            + pynutil.delete("minutes: \"")
            + (graph_2_digit_time)
            + pynutil.insert("分")
            + pynutil.delete("\"")
        )
 
        # 23:00
        h_00 = (
            pynutil.delete("hours: \"")
            + h
            + pynutil.insert("点")
            + pynutil.delete("\"")
            + delete_space
            + pynutil.delete("minutes: \"")
            + (graph_2_digit_zero_none)
            + pynutil.delete("\"")
        )
 
        # 9:12:52
        h_m_s = (
            pynutil.delete("hours: \"")
            + h
            + pynutil.insert("点")
            + pynutil.delete("\"")
            + delete_space
            + pynutil.delete("minutes: \"")
            + m
            + pynutil.insert("分")
            + pynutil.delete("\"")
            + delete_space
            + pynutil.delete("seconds: \"")
            + s
            + pynutil.insert("秒")
            + pynutil.delete("\"")
        )
 
        graph = h_m | h_m_s | h_00
        graph_suffix = (
            pynutil.delete("suffix: \"") + pynini.closure(FUN_NOT_QUOTE) + pynutil.delete("\"") + delete_space + graph
        )
        graph |= graph_suffix
        self.fst = self.delete_tokens(graph).optimize()