From 54931dd4e1a099d7d6f144c4e12e5453deb3aa26 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期三, 28 六月 2023 10:41:57 +0800
Subject: [PATCH] Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main
---
funasr/runtime/onnxruntime/src/tokenizer.h | 45 +++++++++++++++++++++++++++------------------
1 files changed, 27 insertions(+), 18 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/tokenizer.h b/funasr/runtime/onnxruntime/src/tokenizer.h
index d8424a2..3b1d1c5 100644
--- a/funasr/runtime/onnxruntime/src/tokenizer.h
+++ b/funasr/runtime/onnxruntime/src/tokenizer.h
@@ -1,27 +1,36 @@
-#pragma once
-#include "yaml-cpp/yaml.h"
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License (https://opensource.org/licenses/MIT)
+*/
+#pragma once
+#include <yaml-cpp/yaml.h>
+
+namespace funasr {
class CTokenizer {
private:
- bool m_Ready = false;
- vector<string> m_ID2Token,m_ID2Punc;
- map<string, int> m_Token2ID,m_Punc2ID;
+ bool m_ready = false;
+ vector<string> m_id2token,m_id2punc;
+ map<string, int> m_token2id,m_punc2id;
public:
- CTokenizer(const char* szYmlFile);
+ CTokenizer(const char* sz_yamlfile);
CTokenizer();
- bool OpenYaml(const char* szYmlFile);
- void read_yml(const YAML::Node& node);
- vector<string> ID2String(vector<int> Input);
- vector<int> String2IDs(vector<string> Input);
- int String2ID(string Input);
- vector<string> ID2Punc(vector<int> Input);
- string ID2Punc(int nPuncID);
- vector<int> Punc2IDs(vector<string> Input);
- vector<string> SplitChineseString(const string& strInfo);
- void strSplit(const string& str, const char split, vector<string>& res);
- void Tokenize(const char* strInfo, vector<string>& strOut, vector<int>& IDOut);
-
+ ~CTokenizer();
+ bool OpenYaml(const char* sz_yamlfile);
+ void ReadYaml(const YAML::Node& node);
+ vector<string> Id2String(vector<int> input);
+ vector<int> String2Ids(vector<string> input);
+ int String2Id(string input);
+ vector<string> Id2Punc(vector<int> input);
+ string Id2Punc(int n_punc_id);
+ vector<int> Punc2Ids(vector<string> input);
+ vector<string> SplitChineseString(const string& str_info);
+ void StrSplit(const string& str, const char split, vector<string>& res);
+ void Tokenize(const char* str_info, vector<string>& str_out, vector<int>& id_out);
+ bool IsPunc(string& Punc);
};
+
+} // namespace funasr
--
Gitblit v1.9.1