python/FunASR-XL.git

			@@ -14,7 +14,13 @@
			Vocab::Vocab(const char *filename)
			{
			ifstream in(filename);
			LoadVocabFromJson(filename);
			}
			Vocab::Vocab(const char filename, const char lex_file)
			{
			ifstream in(filename);
			LoadVocabFromYaml(filename);
			LoadLex(lex_file);
			}
			Vocab::~Vocab()
			{
			@@ -37,11 +43,56 @@
			}
			}

			int Vocab::GetIdByToken(const std::string &token) {
			if (token_id.count(token)) {
			return token_id[token];
			void Vocab::LoadVocabFromJson(const char* filename){
			nlohmann::json json_array;
			std::ifstream file(filename);
			if (file.is_open()) {
			file >> json_array;
			file.close();
			} else {
			LOG(INFO) << "Error loading token file, token file error or not exist.";
			exit(-1);
			}
			return 0;

			int i = 0;
			for (const auto& element : json_array) {
			vocab.push_back(element);
			token_id[element] = i;
			i++;
			}
			}

			void Vocab::LoadLex(const char* filename){
			std::ifstream file(filename);
			std::string line;
			while (std::getline(file, line)) {
			std::string key, value;
			std::istringstream iss(line);
			std::getline(iss, key, '\t');
			std::getline(iss, value);

			if (!key.empty() && !value.empty()) {
			lex_map[key] = value;
			}
			}

			file.close();
			}

			string Vocab::Word2Lex(const std::string &word) const {
			auto it = lex_map.find(word);
			if (it != lex_map.end()) {
			return it->second;
			}
			return "";
			}

			int Vocab::GetIdByToken(const std::string &token) const {
			auto it = token_id.find(token);
			if (it != token_id.end()) {
			return it->second;
			}
			return -1;
			}

			void Vocab::Vector2String(vector<int> in, std::vector<std::string> &preds)