From a05e753d11d9c36983ec4e58c421dbcf86d1dcd4 Mon Sep 17 00:00:00 2001
From: Xian Shi <40013335+R1ckShi@users.noreply.github.com>
Date: 星期二, 17 十月 2023 16:47:27 +0800
Subject: [PATCH] Merge branch 'main' into dev_onnx
---
funasr/runtime/onnxruntime/src/seg_dict.cpp | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 54 insertions(+), 0 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/seg_dict.cpp b/funasr/runtime/onnxruntime/src/seg_dict.cpp
new file mode 100644
index 0000000..4e7dab4
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/seg_dict.cpp
@@ -0,0 +1,54 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License (https://opensource.org/licenses/MIT)
+*/
+#include "precomp.h"
+//#include "util.h"
+//#include "seg_dict.h"
+#include <glog/logging.h>
+
+#include <fstream>
+#include <iostream>
+#include <list>
+#include <sstream>
+#include <string>
+
+using namespace std;
+
+namespace funasr {
+SegDict::SegDict(const char *filename)
+{
+ ifstream in(filename);
+ if (!in) {
+ LOG(ERROR) << filename << " open failed !!";
+ return;
+ }
+ string textline;
+ while (getline(in, textline)) {
+ std::vector<string> line_item = split(textline, '\t');
+ //std::cout << textline << std::endl;
+ if (line_item.size() > 1) {
+ std::string word = line_item[0];
+ std::string segs = line_item[1];
+ std::vector<string> segs_vec = split(segs, ' ');
+ seg_dict[word] = segs_vec;
+ }
+ }
+ LOG(INFO) << "load seg dict successfully";
+}
+std::vector<std::string> SegDict::GetTokensByWord(const std::string &word) {
+ if (seg_dict.count(word))
+ return seg_dict[word];
+ else {
+ LOG(INFO)<< word <<" is OOV!";
+ std::vector<string> vec;
+ return vec;
+ }
+}
+
+SegDict::~SegDict()
+{
+}
+
+
+} // namespace funasr
--
Gitblit v1.9.1