From b635c062f1550be59047168fcb48a39542913a57 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期三, 20 十二月 2023 17:47:54 +0800
Subject: [PATCH] update TimestampSentence

---
 runtime/docs/websocket_protocol.md    |    4 ++--
 runtime/onnxruntime/src/util.cpp      |    6 ++++--
 runtime/docs/websocket_protocol_zh.md |    4 ++--
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/runtime/docs/websocket_protocol.md b/runtime/docs/websocket_protocol.md
index 867a182..588339e 100644
--- a/runtime/docs/websocket_protocol.md
+++ b/runtime/docs/websocket_protocol.md
@@ -45,7 +45,7 @@
 `text`: the text output of speech recognition
 `is_final`: indicating the end of recognition
 `timestamp`锛欼f AM is a timestamp model, it will return this field, indicating the timestamp, in the format of "[[100,200], [200,500]]"
-`stamp_sents`锛欼f AM is a timestamp model, it will return this field, indicating the stamp_sents, in the format of "[{'text':'姝� 鏄� 鍥� 涓�','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]"
+`stamp_sents`锛欼f AM is a timestamp model, it will return this field, indicating the stamp_sents, in the format of "[{'text_seg':'姝� 鏄� 鍥� 涓�','punc':',','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]"
 ```
 
 ## Real-time Speech Recognition
@@ -94,5 +94,5 @@
 `text`: the text output of speech recognition
 `is_final`: indicating the end of recognition
 `timestamp`锛欼f AM is a timestamp model, it will return this field, indicating the timestamp, in the format of "[[100,200], [200,500]]"
-`stamp_sents`锛欼f AM is a timestamp model, it will return this field, indicating the stamp_sents, in the format of "[{'text':'姝� 鏄� 鍥� 涓�','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]"
+`stamp_sents`锛欼f AM is a timestamp model, it will return this field, indicating the stamp_sents, in the format of "[{'text_seg':'姝� 鏄� 鍥� 涓�','punc':',','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]"
 ```
diff --git a/runtime/docs/websocket_protocol_zh.md b/runtime/docs/websocket_protocol_zh.md
index 98b5c20..e00a989 100644
--- a/runtime/docs/websocket_protocol_zh.md
+++ b/runtime/docs/websocket_protocol_zh.md
@@ -46,7 +46,7 @@
 `text`锛氳〃绀鸿闊宠瘑鍒緭鍑烘枃鏈�
 `is_final`锛氳〃绀鸿瘑鍒粨鏉�
 `timestamp`锛氬鏋淎M涓烘椂闂存埑妯″瀷锛屼細杩斿洖姝ゅ瓧娈碉紝琛ㄧず鏃堕棿鎴筹紝鏍煎紡涓� "[[100,200], [200,500]]"(ms)
-`stamp_sents`锛氬鏋淎M涓烘椂闂存埑妯″瀷锛屼細杩斿洖姝ゅ瓧娈碉紝琛ㄧず鍙ュ瓙绾у埆鏃堕棿鎴筹紝鏍煎紡涓� "[{'text':'姝� 鏄� 鍥� 涓�','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]"
+`stamp_sents`锛氬鏋淎M涓烘椂闂存埑妯″瀷锛屼細杩斿洖姝ゅ瓧娈碉紝琛ㄧず鍙ュ瓙绾у埆鏃堕棿鎴筹紝鏍煎紡涓� "[{'text_seg':'姝� 鏄� 鍥� 涓�','punc':',','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]"
 ```
 
 ## 瀹炴椂璇煶璇嗗埆
@@ -96,5 +96,5 @@
 `text`锛氳〃绀鸿闊宠瘑鍒緭鍑烘枃鏈�
 `is_final`锛氳〃绀鸿瘑鍒粨鏉�
 `timestamp`锛氬鏋淎M涓烘椂闂存埑妯″瀷锛屼細杩斿洖姝ゅ瓧娈碉紝琛ㄧず鏃堕棿鎴筹紝鏍煎紡涓� "[[100,200], [200,500]]"(ms)
-`stamp_sents`锛氬鏋淎M涓烘椂闂存埑妯″瀷锛屼細杩斿洖姝ゅ瓧娈碉紝琛ㄧず鍙ュ瓙绾у埆鏃堕棿鎴筹紝鏍煎紡涓� "[{'text':'姝� 鏄� 鍥� 涓�','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]"
+`stamp_sents`锛氬鏋淎M涓烘椂闂存埑妯″瀷锛屼細杩斿洖姝ゅ瓧娈碉紝琛ㄧず鍙ュ瓙绾у埆鏃堕棿鎴筹紝鏍煎紡涓� "[{'text_seg':'姝� 鏄� 鍥� 涓�','punc':',','start':'430','end':'1130','ts_list':[[430,670],[670,810],[810,1030],[1030,1130]]}]"
 ```
diff --git a/runtime/onnxruntime/src/util.cpp b/runtime/onnxruntime/src/util.cpp
index 661a50d..0fbec4f 100644
--- a/runtime/onnxruntime/src/util.cpp
+++ b/runtime/onnxruntime/src/util.cpp
@@ -584,7 +584,8 @@
                 }
             }
             // format
-            ts_sent += "{'text':'" + text_seg + "',";
+            ts_sent += "{'text_seg':'" + text_seg + "',";
+            ts_sent += "'punc':'" + characters[idx_str] + "',";
             ts_sent += "'start':'" + to_string(start) + "',";
             ts_sent += "'end':'" + to_string(end) + "',";
             ts_sent += "'ts_list':" + VectorToString(ts_seg) + "}";
@@ -620,7 +621,8 @@
             end = ts_seg[ts_seg.size()-1][1];
         }
         // format
-        ts_sent += "{'text':'" + text_seg + "',";
+        ts_sent += "{'text_seg':'" + text_seg + "',";
+        ts_sent += "'punc':'',";
         ts_sent += "'start':'" + to_string(start) + "',";
         ts_sent += "'end':'" + to_string(end) + "',";
         ts_sent += "'ts_list':" + VectorToString(ts_seg) + "}";

--
Gitblit v1.9.1