From 8b0fb74bded1f8a162e6c0e94c3522be6216ea03 Mon Sep 17 00:00:00 2001
From: chengligen <101448376+chengligen@users.noreply.github.com>
Date: 星期一, 26 五月 2025 14:11:33 +0800
Subject: [PATCH] feat: add 'words' key aligned with timestamps in sensevoice model output (#2531)

---
 fun_text_processing/text_normalization/token_parser.py |   20 +++++++++-----------
 1 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/fun_text_processing/text_normalization/token_parser.py b/fun_text_processing/text_normalization/token_parser.py
index 10b2ab3..cb5dfe1 100644
--- a/fun_text_processing/text_normalization/token_parser.py
+++ b/fun_text_processing/text_normalization/token_parser.py
@@ -1,5 +1,3 @@
-
-
 import string
 from collections import OrderedDict
 from typing import Dict, List, Union
@@ -22,7 +20,7 @@
 
         Args:
             text: text to be parsed
-        
+
         """
         self.text = text
         self.len_text = len(text)
@@ -76,9 +74,9 @@
         if self.char == ":":
             self.parse_char(":")
             self.parse_ws()
-            self.parse_char("\"")
+            self.parse_char('"')
             value_string = self.parse_string_value()
-            self.parse_char("\"")
+            self.parse_char('"')
             return value_string
         elif self.char == "{":
             d = OrderedDict()
@@ -95,11 +93,11 @@
 
     def parse_char(self, exp) -> bool:
         """
-        Parses character 
+        Parses character
 
         Args:
             exp: character to read in
-        
+
         Returns true if successful
         """
         assert self.char == exp
@@ -112,7 +110,7 @@
 
         Args:
             exp: characters to read in
-        
+
         Returns true if successful
         """
         ok = False
@@ -147,7 +145,7 @@
         # assert self.char not in string.whitespace and self.char != EOS
         assert self.char != EOS
         l = []
-        while self.char != "\"" or self.text[self.index + 1] != " ":
+        while self.char != '"' or self.text[self.index + 1] != " ":
             l.append(self.char)
             if not self.read():
                 raise ValueError()
@@ -169,8 +167,8 @@
 
     def read(self):
         """
-        Reads in next char. 
-        
+        Reads in next char.
+
         Returns true if not EOS
         """
         if self.index < self.len_text - 1:  # should be unique

--
Gitblit v1.9.1