| New file |
| | |
| | | /** |
| | | * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. |
| | | * MIT License (https://opensource.org/licenses/MIT) |
| | | */ |
| | | #include "precomp.h" |
| | | //#include "util.h" |
| | | //#include "seg_dict.h" |
| | | #include <glog/logging.h> |
| | | |
| | | #include <fstream> |
| | | #include <iostream> |
| | | #include <list> |
| | | #include <sstream> |
| | | #include <string> |
| | | |
| | | using namespace std; |
| | | |
| | | namespace funasr { |
| | | SegDict::SegDict(const char *filename) |
| | | { |
| | | ifstream in(filename); |
| | | if (!in) { |
| | | LOG(ERROR) << filename << " open failed !!"; |
| | | return; |
| | | } |
| | | string textline; |
| | | while (getline(in, textline)) { |
| | | std::vector<string> line_item = split(textline, '\t'); |
| | | //std::cout << textline << std::endl; |
| | | if (line_item.size() > 1) { |
| | | std::string word = line_item[0]; |
| | | std::string segs = line_item[1]; |
| | | std::vector<string> segs_vec = split(segs, ' '); |
| | | seg_dict[word] = segs_vec; |
| | | } |
| | | } |
| | | LOG(INFO) << "load seg dict successfully"; |
| | | } |
| | | std::vector<std::string> SegDict::GetTokensByWord(const std::string &word) { |
| | | if (seg_dict.count(word)) |
| | | return seg_dict[word]; |
| | | else { |
| | | LOG(INFO)<< word <<" is OOV!"; |
| | | std::vector<string> vec; |
| | | return vec; |
| | | } |
| | | } |
| | | |
| | | SegDict::~SegDict() |
| | | { |
| | | } |
| | | |
| | | |
| | | } // namespace funasr |