majic31
2024-12-24 23e7ddebccd3b05cf7ef89809bcfe565ad6dfa1f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import pynini
from fun_text_processing.text_normalization.zh.graph_utils import GraphFst, insert_space
from fun_text_processing.text_normalization.zh.utils import get_abs_path
from pynini.lib import pynutil
 
 
class Time(GraphFst):
    """
    1:02    -> tokens { time { hours: "1" minitus: "02" } }
    1:02:36 -> tokens { time { hours: "1" minutes: "02" seconds: "36" } }
    1:02 am -> tokens { time { hours: "1" minutes: "02" seconds: "36" suffix "am" } }
    """
 
    def __init__(self, deterministic: bool = True, lm: bool = False):
        super().__init__(name="time", kind="classify", deterministic=deterministic)
 
        h = pynini.string_file(get_abs_path("data/time/hour.tsv"))
        time_tens = pynini.string_file(get_abs_path("data/time/tens.tsv"))
        time_digit = pynini.string_file(get_abs_path("data/time/digit.tsv"))
        time_zero = pynini.string_file(get_abs_path("data/time/zero.tsv"))
        time_digit = time_digit | time_zero
        time_suffix = pynini.string_file(get_abs_path("data/time/suffix.tsv"))
 
        m = time_tens + time_digit
        s = (time_tens + time_digit) | time_digit
 
        delete_colon = pynini.cross(":", " ")
 
        # 5:05, 14:30
        h_m = (
            pynutil.insert('hours: "')
            + h
            + pynutil.insert('"')
            + delete_colon
            + pynutil.insert('minutes: "')
            + m
            + pynutil.insert('"')
        )
 
        # 1:30:15
        h_m_s = (
            pynutil.insert('hours: "')
            + h
            + pynutil.insert('"')
            + delete_colon
            + pynutil.insert('minutes: "')
            + m
            + pynutil.insert('"')
            + delete_colon
            + pynutil.insert('seconds: "')
            + s
            + pynutil.insert('"')
        )
 
        graph = h_m | h_m_s
        graph_suffix = (
            graph + insert_space + pynutil.insert('suffix: "') + time_suffix + pynutil.insert('"')
        )
        graph |= graph_suffix
        self.fst = self.add_tokens(graph).optimize()