import pynini from fun_text_processing.text_normalization.en.graph_utils import DAMO_NOT_QUOTE, DAMO_SIGMA, GraphFst, insert_space from fun_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst from pynini.examples import plurals from pynini.lib import pynutil class FractionFst(GraphFst): """ Finite state transducer for verbalizing fraction e.g. tokens { fraction { integer: "twenty three" numerator: "four" denominator: "five" } } -> twenty three and four fifth Args: deterministic: if True will provide a single transduction option, for False multiple transduction are generated (used for audio-based normalization) """ def __init__(self, deterministic: bool = True, lm: bool = False): super().__init__(name="fraction", kind="verbalize", deterministic=deterministic) suffix = OrdinalFst().suffix integer = pynutil.delete("integer_part: \"") + pynini.closure(DAMO_NOT_QUOTE) + pynutil.delete("\" ") denominator_one = pynini.cross("denominator: \"one\"", "over one") denominator_half = pynini.cross("denominator: \"two\"", "half") denominator_quarter = pynini.cross("denominator: \"four\"", "quarter") denominator_rest = ( pynutil.delete("denominator: \"") + pynini.closure(DAMO_NOT_QUOTE) @ suffix + pynutil.delete("\"") ) denominators = plurals._priority_union( denominator_one, plurals._priority_union( denominator_half, plurals._priority_union(denominator_quarter, denominator_rest, DAMO_SIGMA), DAMO_SIGMA, ), DAMO_SIGMA, ).optimize() if not deterministic: denominators |= pynutil.delete("denominator: \"") + (pynini.accep("four") @ suffix) + pynutil.delete("\"") numerator_one = pynutil.delete("numerator: \"") + pynini.accep("one") + pynutil.delete("\" ") numerator_one = numerator_one + insert_space + denominators numerator_rest = ( pynutil.delete("numerator: \"") + (pynini.closure(DAMO_NOT_QUOTE) - pynini.accep("one")) + pynutil.delete("\" ") ) numerator_rest = numerator_rest + insert_space + denominators numerator_rest @= pynini.cdrewrite( plurals._priority_union(pynini.cross("half", "halves"), pynutil.insert("s"), DAMO_SIGMA), "", "[EOS]", DAMO_SIGMA, ) graph = numerator_one | numerator_rest conjunction = pynutil.insert("and ") if not deterministic and not lm: conjunction = pynini.closure(conjunction, 0, 1) integer = pynini.closure(integer + insert_space + conjunction, 0, 1) graph = integer + graph graph @= pynini.cdrewrite( pynini.cross("and one half", "and a half") | pynini.cross("over ones", "over one"), "", "[EOS]", DAMO_SIGMA ) self.graph = graph delete_tokens = self.delete_tokens(self.graph) self.fst = delete_tokens.optimize()