| | |
| | | if (text.size() > 1) { |
| | | score = std::stof(text[1]); |
| | | } |
| | | Utf8ToCharset(text[0], split_str); |
| | | SplitChiEngCharacters(text[0], split_str); |
| | | for (auto &str : split_str) { |
| | | split_id.push_back(phn_set_.String2Id(str)); |
| | | if (!phn_set_.Find(str)) { |
| | | is_oov = true; |
| | | break; |
| | | std::vector<string> lex_vec; |
| | | std::string lex_str = vocab_.Word2Lex(str); |
| | | SplitStringToVector(lex_str, " ", true, &lex_vec); |
| | | for (auto &token : lex_vec) { |
| | | split_id.push_back(phn_set_.String2Id(token)); |
| | | if (!phn_set_.Find(token)) { |
| | | is_oov = true; |
| | | break; |
| | | } |
| | | } |
| | | } |
| | | if (!is_oov) { |
| | |
| | | std::vector<std::string> split_str; |
| | | std::vector<int> split_id; |
| | | score = kv.second; |
| | | Utf8ToCharset(kv.first, split_str); |
| | | SplitChiEngCharacters(kv.first, split_str); |
| | | for (auto &str : split_str) { |
| | | split_id.push_back(phn_set_.String2Id(str)); |
| | | if (!phn_set_.Find(str)) { |
| | | is_oov = true; |
| | | break; |
| | | std::vector<string> lex_vec; |
| | | std::string lex_str = vocab_.Word2Lex(str); |
| | | SplitStringToVector(lex_str, " ", true, &lex_vec); |
| | | for (auto &token : lex_vec) { |
| | | split_id.push_back(phn_set_.String2Id(token)); |
| | | if (!phn_set_.Find(token)) { |
| | | is_oov = true; |
| | | break; |
| | | } |
| | | } |
| | | } |
| | | if (!is_oov) { |