| | |
| | | #include "vocab.h" |
| | | #include "yaml-cpp/yaml.h" |
| | | #include <yaml-cpp/yaml.h> |
| | | #include <glog/logging.h> |
| | | |
| | | #include <fstream> |
| | | #include <iostream> |
| | |
| | | |
| | | using namespace std; |
| | | |
| | | namespace funasr { |
| | | Vocab::Vocab(const char *filename) |
| | | { |
| | | ifstream in(filename); |
| | |
| | | YAML::Node config; |
| | | try{ |
| | | config = YAML::LoadFile(filename); |
| | | }catch(...){ |
| | | printf("error loading file, yaml file error or not exist.\n"); |
| | | }catch(exception const &e){ |
| | | LOG(INFO) << "Error loading file, yaml file error or not exist."; |
| | | exit(-1); |
| | | } |
| | | YAML::Node myList = config["token_list"]; |
| | |
| | | else { |
| | | // pre word is chinese |
| | | if (!is_pre_english) { |
| | | word[0] = word[0] - 32; |
| | | // word[0] = word[0] - 32; |
| | | words.push_back(word); |
| | | pre_english_len = word.size(); |
| | | |
| | | } |
| | | // pre word is english word |
| | | else { |
| | | // single letter turn to upper case |
| | | if (word.size() == 1) { |
| | | word[0] = word[0] - 32; |
| | | } |
| | | // if (word.size() == 1) { |
| | | // word[0] = word[0] - 32; |
| | | // } |
| | | |
| | | if (pre_english_len > 1) { |
| | | words.push_back(" "); |
| | |
| | | { |
| | | return vocab.size(); |
| | | } |
| | | |
| | | } // namespace funasr |