| | |
| | | } |
| | | |
| | | bool TimestampIsPunctuation(U16CHAR_T &u16) { |
| | | // (& ' -) in the dict |
| | | if (u16 == 0x26 || u16 == 0x27 || u16 == 0x2D){ |
| | | return false; |
| | | } |
| | | return (u16 >= 0x21 && u16 <= 0x2F) // 标准ASCII标点 |
| | | || (u16 >= 0x3A && u16 <= 0x40) // 标准ASCII标点 |
| | | || (u16 >= 0x5B && u16 <= 0x60) // 标准ASCII标点 |
| | |
| | | } |
| | | } |
| | | |
| | | std::string VectorToString(const std::vector<std::vector<int>>& vec) { |
| | | std::string VectorToString(const std::vector<std::vector<int>>& vec, bool out_empty) { |
| | | if(vec.size() == 0){ |
| | | return ""; |
| | | if(out_empty){ |
| | | return ""; |
| | | }else{ |
| | | return "[]"; |
| | | } |
| | | } |
| | | std::ostringstream out; |
| | | out << "["; |
| | |
| | | } |
| | | } |
| | | // format |
| | | ts_sent += "{'text':'" + text_seg + "',"; |
| | | ts_sent += "'start':'" + to_string(start) + "',"; |
| | | ts_sent += "'end':'" + to_string(end) + "',"; |
| | | ts_sent += "'ts_list':" + VectorToString(ts_seg) + "}"; |
| | | ts_sent += "{\"text_seg\":\"" + text_seg + "\","; |
| | | ts_sent += "\"punc\":\"" + characters[idx_str] + "\","; |
| | | ts_sent += "\"start\":" + to_string(start) + ","; |
| | | ts_sent += "\"end\":" + to_string(end) + ","; |
| | | ts_sent += "\"ts_list\":" + VectorToString(ts_seg, false) + "}"; |
| | | |
| | | if (idx_str == characters.size()-1){ |
| | | ts_sentences += ts_sent; |
| | | } else{ |
| | | ts_sentences += ts_sent + ","; |
| | | } |
| | | |
| | | // clear |
| | | idx_str++; |
| | | text_seg = ""; |
| | | ts_sent = ""; |
| | | start = 0; |
| | |
| | | text_seg += " " + characters[idx_str]; |
| | | } |
| | | ts_seg.push_back(timestamps[idx_ts]); |
| | | idx_str++; |
| | | idx_ts++; |
| | | } |
| | | idx_str++; |
| | | } |
| | | // for none punc results |
| | | if(ts_seg.size() >0){ |
| | |
| | | end = ts_seg[ts_seg.size()-1][1]; |
| | | } |
| | | // format |
| | | ts_sent += "{'text':'" + text_seg + "',"; |
| | | ts_sent += "'start':'" + to_string(start) + "',"; |
| | | ts_sent += "'end':'" + to_string(end) + "',"; |
| | | ts_sent += "'ts_list':" + VectorToString(ts_seg) + "}"; |
| | | ts_sent += "{\"text_seg\":\"" + text_seg + "\","; |
| | | ts_sent += "\"punc\":\"\","; |
| | | ts_sent += "\"start\":" + to_string(start) + ","; |
| | | ts_sent += "\"end\":" + to_string(end) + ","; |
| | | ts_sent += "\"ts_list\":" + VectorToString(ts_seg, false) + "}"; |
| | | ts_sentences += ts_sent; |
| | | } |
| | | |
| | |
| | | elems.push_back(item); |
| | | } |
| | | return elems; |
| | | } |
| | | |
| | | std::vector<std::string> SplitStr(const std::string &s, string delimiter) { |
| | | std::vector<std::string> tokens; |
| | | size_t start = 0; |
| | | size_t end = s.find(delimiter); |
| | | |
| | | while (end != std::string::npos) { |
| | | tokens.push_back(s.substr(start, end - start)); |
| | | start = end + delimiter.length(); |
| | | end = s.find(delimiter, start); |
| | | } |
| | | tokens.push_back(s.substr(start, end - start)); |
| | | |
| | | return tokens; |
| | | } |
| | | |
| | | template<typename T> |
| | |
| | | sum -=(1.0 - 1e-4); |
| | | } |
| | | } |
| | | // fix case: sum > 1 |
| | | int cif_idx = cif_peak.size()-1; |
| | | while(sum>=1.0 - 1e-4 && cif_idx >= 0 ){ |
| | | if(cif_peak[cif_idx] < 1.0 - 1e-4){ |
| | | cif_peak[cif_idx] = sum; |
| | | sum -=(1.0 - 1e-4); |
| | | } |
| | | cif_idx--; |
| | | } |
| | | |
| | | fire_place.clear(); |
| | | for (int i = 0; i < num_frames; i++) { |