From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交
---
runtime/onnxruntime/src/util.cpp | 58 +++++++++++++++++++++++++++++++++++++++++++++-------------
1 files changed, 45 insertions(+), 13 deletions(-)
diff --git a/runtime/onnxruntime/src/util.cpp b/runtime/onnxruntime/src/util.cpp
index ac793f5..50c9c82 100644
--- a/runtime/onnxruntime/src/util.cpp
+++ b/runtime/onnxruntime/src/util.cpp
@@ -305,6 +305,10 @@
}
bool TimestampIsPunctuation(U16CHAR_T &u16) {
+ // (& ' -) in the dict
+ if (u16 == 0x26 || u16 == 0x27 || u16 == 0x2D){
+ return false;
+ }
return (u16 >= 0x21 && u16 <= 0x2F) // 鏍囧噯ASCII鏍囩偣
|| (u16 >= 0x3A && u16 <= 0x40) // 鏍囧噯ASCII鏍囩偣
|| (u16 >= 0x5B && u16 <= 0x60) // 鏍囧噯ASCII鏍囩偣
@@ -361,9 +365,13 @@
}
}
-std::string VectorToString(const std::vector<std::vector<int>>& vec) {
+std::string VectorToString(const std::vector<std::vector<int>>& vec, bool out_empty) {
if(vec.size() == 0){
- return "";
+ if(out_empty){
+ return "";
+ }else{
+ return "[]";
+ }
}
std::ostringstream out;
out << "[";
@@ -580,19 +588,18 @@
}
}
// format
- ts_sent += "{'text':'" + text_seg + "',";
- ts_sent += "'start':'" + to_string(start) + "',";
- ts_sent += "'end':'" + to_string(end) + "',";
- ts_sent += "'ts_list':" + VectorToString(ts_seg) + "}";
+ ts_sent += "{\"text_seg\":\"" + text_seg + "\",";
+ ts_sent += "\"punc\":\"" + characters[idx_str] + "\",";
+ ts_sent += "\"start\":" + to_string(start) + ",";
+ ts_sent += "\"end\":" + to_string(end) + ",";
+ ts_sent += "\"ts_list\":" + VectorToString(ts_seg, false) + "}";
if (idx_str == characters.size()-1){
ts_sentences += ts_sent;
} else{
ts_sentences += ts_sent + ",";
}
-
// clear
- idx_str++;
text_seg = "";
ts_sent = "";
start = 0;
@@ -605,9 +612,9 @@
text_seg += " " + characters[idx_str];
}
ts_seg.push_back(timestamps[idx_ts]);
- idx_str++;
idx_ts++;
}
+ idx_str++;
}
// for none punc results
if(ts_seg.size() >0){
@@ -618,10 +625,11 @@
end = ts_seg[ts_seg.size()-1][1];
}
// format
- ts_sent += "{'text':'" + text_seg + "',";
- ts_sent += "'start':'" + to_string(start) + "',";
- ts_sent += "'end':'" + to_string(end) + "',";
- ts_sent += "'ts_list':" + VectorToString(ts_seg) + "}";
+ ts_sent += "{\"text_seg\":\"" + text_seg + "\",";
+ ts_sent += "\"punc\":\"\",";
+ ts_sent += "\"start\":" + to_string(start) + ",";
+ ts_sent += "\"end\":" + to_string(end) + ",";
+ ts_sent += "\"ts_list\":" + VectorToString(ts_seg, false) + "}";
ts_sentences += ts_sent;
}
@@ -636,6 +644,21 @@
elems.push_back(item);
}
return elems;
+}
+
+std::vector<std::string> SplitStr(const std::string &s, string delimiter) {
+ std::vector<std::string> tokens;
+ size_t start = 0;
+ size_t end = s.find(delimiter);
+
+ while (end != std::string::npos) {
+ tokens.push_back(s.substr(start, end - start));
+ start = end + delimiter.length();
+ end = s.find(delimiter, start);
+ }
+ tokens.push_back(s.substr(start, end - start));
+
+ return tokens;
}
template<typename T>
@@ -862,6 +885,15 @@
sum -=(1.0 - 1e-4);
}
}
+ // fix case: sum > 1
+ int cif_idx = cif_peak.size()-1;
+ while(sum>=1.0 - 1e-4 && cif_idx >= 0 ){
+ if(cif_peak[cif_idx] < 1.0 - 1e-4){
+ cif_peak[cif_idx] = sum;
+ sum -=(1.0 - 1e-4);
+ }
+ cif_idx--;
+ }
fire_place.clear();
for (int i = 0; i < num_frames; i++) {
--
Gitblit v1.9.1