| | |
| | | } |
| | | |
| | | bool TimestampIsPunctuation(U16CHAR_T &u16) { |
| | | // (& ' -) in the dict |
| | | if (u16 == 0x26 || u16 == 0x27 || u16 == 0x2D){ |
| | | return false; |
| | | } |
| | | return (u16 >= 0x21 && u16 <= 0x2F) // 标准ASCII标点 |
| | | || (u16 >= 0x3A && u16 <= 0x40) // 标准ASCII标点 |
| | | || (u16 >= 0x5B && u16 <= 0x60) // 标准ASCII标点 |
| | |
| | | } |
| | | } |
| | | |
| | | std::string VectorToString(const std::vector<std::vector<int>>& vec) { |
| | | std::string VectorToString(const std::vector<std::vector<int>>& vec, bool out_empty) { |
| | | if(vec.size() == 0){ |
| | | return ""; |
| | | if(out_empty){ |
| | | return ""; |
| | | }else{ |
| | | return "[]"; |
| | | } |
| | | } |
| | | std::ostringstream out; |
| | | out << "["; |
| | |
| | | } |
| | | } |
| | | // format |
| | | ts_sent += "{'text':'" + text_seg + "',"; |
| | | ts_sent += "'start':'" + to_string(start) + "',"; |
| | | ts_sent += "'end':'" + to_string(end) + "',"; |
| | | ts_sent += "'ts_list':" + VectorToString(ts_seg) + "}"; |
| | | ts_sent += "{\"text_seg\":\"" + text_seg + "\","; |
| | | ts_sent += "\"punc\":\"" + characters[idx_str] + "\","; |
| | | ts_sent += "\"start\":" + to_string(start) + ","; |
| | | ts_sent += "\"end\":" + to_string(end) + ","; |
| | | ts_sent += "\"ts_list\":" + VectorToString(ts_seg, false) + "}"; |
| | | |
| | | if (idx_str == characters.size()-1){ |
| | | ts_sentences += ts_sent; |
| | | } else{ |
| | | ts_sentences += ts_sent + ","; |
| | | } |
| | | |
| | | // clear |
| | | idx_str++; |
| | | text_seg = ""; |
| | | ts_sent = ""; |
| | | start = 0; |
| | |
| | | text_seg += " " + characters[idx_str]; |
| | | } |
| | | ts_seg.push_back(timestamps[idx_ts]); |
| | | idx_str++; |
| | | idx_ts++; |
| | | } |
| | | idx_str++; |
| | | } |
| | | // for none punc results |
| | | if(ts_seg.size() >0){ |
| | |
| | | end = ts_seg[ts_seg.size()-1][1]; |
| | | } |
| | | // format |
| | | ts_sent += "{'text':'" + text_seg + "',"; |
| | | ts_sent += "'start':'" + to_string(start) + "',"; |
| | | ts_sent += "'end':'" + to_string(end) + "',"; |
| | | ts_sent += "'ts_list':" + VectorToString(ts_seg) + "}"; |
| | | ts_sent += "{\"text_seg\":\"" + text_seg + "\","; |
| | | ts_sent += "\"punc\":\"\","; |
| | | ts_sent += "\"start\":" + to_string(start) + ","; |
| | | ts_sent += "\"end\":" + to_string(end) + ","; |
| | | ts_sent += "\"ts_list\":" + VectorToString(ts_seg, false) + "}"; |
| | | ts_sentences += ts_sent; |
| | | } |
| | | |