From 28a19dbc4e85d3b8a4ec2ef7483bba64d422b43f Mon Sep 17 00:00:00 2001
From: aky15 <ankeyu.aky@11.17.44.249>
Date: 星期三, 12 四月 2023 18:03:06 +0800
Subject: [PATCH] Merge remote-tracking branch 'origin/main' into dev_aky
---
funasr/version.txt | 2
funasr/runtime/grpc/paraformer_server.cc | 57 ++---
funasr/runtime/onnxruntime/src/CMakeLists.txt | 28 +-
funasr/runtime/onnxruntime/tester/CMakeLists.txt | 2
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer_after_finetune.py | 93 ++++++-
funasr/runtime/python/grpc/grpc_main_client.py | 62 +++++
funasr/tasks/abs_task.py | 9
funasr/runtime/onnxruntime/src/paraformer_onnx.cpp | 28 +
funasr/runtime/onnxruntime/tester/tester.cpp | 58 ----
funasr/runtime/onnxruntime/include/libfunasrapi.h | 77 +++++++
funasr/runtime/onnxruntime/src/commonfunc.h | 4
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer.py | 3
/dev/null | 77 -------
funasr/runtime/onnxruntime/tester/tester_rtf.cpp | 18
funasr/runtime/grpc/CMakeLists.txt | 2
funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py | 23 -
funasr/runtime/onnxruntime/src/FeatureExtract.cpp | 28 -
funasr/runtime/onnxruntime/src/FeatureExtract.h | 13
funasr/runtime/onnxruntime/src/libfunasrapi.cpp | 42 +-
funasr/runtime/onnxruntime/src/paraformer_onnx.h | 7
funasr/runtime/onnxruntime/src/precomp.h | 2
funasr/runtime/grpc/paraformer_server.h | 12
funasr/runtime/python/libtorch/funasr_torch/utils/utils.py | 13
23 files changed, 347 insertions(+), 313 deletions(-)
diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer.py
index 96db5f9..ce8988e 100644
--- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer.py
+++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer.py
@@ -23,8 +23,7 @@
batch_size=1
)
audio_in = os.path.join(split_dir, "wav.{}.scp".format(idx))
- inference_pipline(audio_in=audio_in, param_dict={"decoding_model": "offline"})
-
+ inference_pipline(audio_in=audio_in)
def modelscope_infer(params):
# prepare for multi-GPU decoding
diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer_after_finetune.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer_after_finetune.py
index 74691f0..1e9c4d1 100644
--- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer_after_finetune.py
+++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/infer_after_finetune.py
@@ -2,52 +2,103 @@
import os
import shutil
+from multiprocessing import Pool
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from funasr.utils.compute_wer import compute_wer
+def modelscope_infer_after_finetune_core(model_dir, output_dir, split_dir, njob, idx):
+ output_dir_job = os.path.join(output_dir, "output.{}".format(idx))
+ gpu_id = (int(idx) - 1) // njob
+ if "CUDA_VISIBLE_DEVICES" in os.environ.keys():
+ gpu_list = os.environ['CUDA_VISIBLE_DEVICES'].split(",")
+ os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_list[gpu_id])
+ else:
+ os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model=model_dir,
+ output_dir=output_dir_job,
+ batch_size=1
+ )
+ audio_in = os.path.join(split_dir, "wav.{}.scp".format(idx))
+ inference_pipeline(audio_in=audio_in)
+
def modelscope_infer_after_finetune(params):
- # prepare for decoding
+ # prepare for multi-GPU decoding
+ model_dir = params["model_dir"]
pretrained_model_path = os.path.join(os.environ["HOME"], ".cache/modelscope/hub", params["modelscope_model_name"])
for file_name in params["required_files"]:
if file_name == "configuration.json":
with open(os.path.join(pretrained_model_path, file_name)) as f:
config_dict = json.load(f)
config_dict["model"]["am_model_name"] = params["decoding_model_name"]
- with open(os.path.join(params["output_dir"], "configuration.json"), "w") as f:
+ with open(os.path.join(model_dir, "configuration.json"), "w") as f:
json.dump(config_dict, f, indent=4, separators=(',', ': '))
else:
shutil.copy(os.path.join(pretrained_model_path, file_name),
- os.path.join(params["output_dir"], file_name))
- decoding_path = os.path.join(params["output_dir"], "decode_results")
- if os.path.exists(decoding_path):
- shutil.rmtree(decoding_path)
- os.mkdir(decoding_path)
+ os.path.join(model_dir, file_name))
+ ngpu = params["ngpu"]
+ njob = params["njob"]
+ output_dir = params["output_dir"]
+ if os.path.exists(output_dir):
+ shutil.rmtree(output_dir)
+ os.mkdir(output_dir)
+ split_dir = os.path.join(output_dir, "split")
+ os.mkdir(split_dir)
+ nj = ngpu * njob
+ wav_scp_file = os.path.join(params["data_dir"], "wav.scp")
+ with open(wav_scp_file) as f:
+ lines = f.readlines()
+ num_lines = len(lines)
+ num_job_lines = num_lines // nj
+ start = 0
+ for i in range(nj):
+ end = start + num_job_lines
+ file = os.path.join(split_dir, "wav.{}.scp".format(str(i + 1)))
+ with open(file, "w") as f:
+ if i == nj - 1:
+ f.writelines(lines[start:])
+ else:
+ f.writelines(lines[start:end])
+ start = end
- # decoding
- inference_pipeline = pipeline(
- task=Tasks.auto_speech_recognition,
- model=params["output_dir"],
- output_dir=decoding_path,
- batch_size=1
- )
- audio_in = os.path.join(params["data_dir"], "wav.scp")
- inference_pipeline(audio_in=audio_in, param_dict={"decoding_model": "offline"})
+ p = Pool(nj)
+ for i in range(nj):
+ p.apply_async(modelscope_infer_after_finetune_core,
+ args=(model_dir, output_dir, split_dir, njob, str(i + 1)))
+ p.close()
+ p.join()
- # computer CER if GT text is set
+ # combine decoding results
+ best_recog_path = os.path.join(output_dir, "1best_recog")
+ os.mkdir(best_recog_path)
+ files = ["text", "token", "score"]
+ for file in files:
+ with open(os.path.join(best_recog_path, file), "w") as f:
+ for i in range(nj):
+ job_file = os.path.join(output_dir, "output.{}/1best_recog".format(str(i + 1)), file)
+ with open(job_file) as f_job:
+ lines = f_job.readlines()
+ f.writelines(lines)
+
+ # If text exists, compute CER
text_in = os.path.join(params["data_dir"], "text")
if os.path.exists(text_in):
- text_proc_file = os.path.join(decoding_path, "1best_recog/text")
- compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer"))
-
+ text_proc_file = os.path.join(best_recog_path, "token")
+ compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer"))
if __name__ == '__main__':
params = {}
params["modelscope_model_name"] = "damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline"
params["required_files"] = ["am.mvn", "decoding.yaml", "configuration.json"]
- params["output_dir"] = "./checkpoint"
+ params["model_dir"] = "./checkpoint"
+ params["output_dir"] = "./results"
params["data_dir"] = "./data/test"
params["decoding_model_name"] = "20epoch.pb"
+ params["ngpu"] = 1
+ params["njob"] = 1
modelscope_infer_after_finetune(params)
+
diff --git a/funasr/runtime/grpc/CMakeLists.txt b/funasr/runtime/grpc/CMakeLists.txt
index 1d5d9a9..c7727d5 100644
--- a/funasr/runtime/grpc/CMakeLists.txt
+++ b/funasr/runtime/grpc/CMakeLists.txt
@@ -74,7 +74,7 @@
"${_target}.cc")
target_link_libraries(${_target}
rg_grpc_proto
- rapidasr
+ funasr
${EXTRA_LIBS}
${_REFLECTION}
${_GRPC_GRPCPP}
diff --git a/funasr/runtime/grpc/paraformer_server.cc b/funasr/runtime/grpc/paraformer_server.cc
index f2ab4e0..2bfd3e5 100644
--- a/funasr/runtime/grpc/paraformer_server.cc
+++ b/funasr/runtime/grpc/paraformer_server.cc
@@ -15,7 +15,6 @@
#include "paraformer.grpc.pb.h"
#include "paraformer_server.h"
-
using grpc::Server;
using grpc::ServerBuilder;
using grpc::ServerContext;
@@ -24,37 +23,14 @@
using grpc::ServerWriter;
using grpc::Status;
-
using paraformer::Request;
using paraformer::Response;
using paraformer::ASR;
ASRServicer::ASRServicer(const char* model_path, int thread_num, bool quantize) {
- AsrHanlde=RapidAsrInit(model_path, thread_num, quantize);
+ AsrHanlde=FunASRInit(model_path, thread_num, quantize);
std::cout << "ASRServicer init" << std::endl;
init_flag = 0;
-}
-
-void ASRServicer::clear_states(const std::string& user) {
- clear_buffers(user);
- clear_transcriptions(user);
-}
-
-void ASRServicer::clear_buffers(const std::string& user) {
- if (client_buffers.count(user)) {
- client_buffers.erase(user);
- }
-}
-
-void ASRServicer::clear_transcriptions(const std::string& user) {
- if (client_transcription.count(user)) {
- client_transcription.erase(user);
- }
-}
-
-void ASRServicer::disconnect(const std::string& user) {
- clear_states(user);
- std::cout << "Disconnecting user: " << user << std::endl;
}
grpc::Status ASRServicer::Recognize(
@@ -62,10 +38,20 @@
grpc::ServerReaderWriter<Response, Request>* stream) {
Request req;
+ std::unordered_map<std::string, std::string> client_buffers;
+ std::unordered_map<std::string, std::string> client_transcription;
+
while (stream->Read(&req)) {
if (req.isend()) {
std::cout << "asr end" << std::endl;
- disconnect(req.user());
+ // disconnect
+ if (client_buffers.count(req.user())) {
+ client_buffers.erase(req.user());
+ }
+ if (client_transcription.count(req.user())) {
+ client_transcription.erase(req.user());
+ }
+
Response res;
res.set_sentence(
R"({"success": true, "detail": "asr end"})"
@@ -103,8 +89,14 @@
auto& buf = client_buffers[req.user()];
buf.insert(buf.end(), req.audio_data().begin(), req.audio_data().end());
}
- std::string tmp_data = this->client_buffers[req.user()];
- this->clear_states(req.user());
+ std::string tmp_data = client_buffers[req.user()];
+ // clear_states
+ if (client_buffers.count(req.user())) {
+ client_buffers.erase(req.user());
+ }
+ if (client_transcription.count(req.user())) {
+ client_transcription.erase(req.user());
+ }
Response res;
res.set_sentence(
@@ -133,14 +125,11 @@
res.set_user(req.user());
res.set_action("finish");
res.set_language(req.language());
-
-
-
stream->Write(res);
}
else {
- RPASR_RESULT Result= RapidAsrRecogPCMBuffer(AsrHanlde, tmp_data.c_str(), data_len_int, RASR_NONE, NULL);
- std::string asr_result = ((RPASR_RECOG_RESULT*)Result)->msg;
+ FUNASR_RESULT Result= FunASRRecogPCMBuffer(AsrHanlde, tmp_data.c_str(), data_len_int, RASR_NONE, NULL);
+ std::string asr_result = ((FUNASR_RECOG_RESULT*)Result)->msg;
auto end_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
std::string delay_str = std::to_string(end_time - begin_time);
@@ -154,7 +143,6 @@
res.set_user(req.user());
res.set_action("finish");
res.set_language(req.language());
-
stream->Write(res);
}
@@ -172,7 +160,6 @@
}
return Status::OK;
}
-
void RunServer(const std::string& port, int thread_num, const char* model_path, bool quantize) {
std::string server_address;
diff --git a/funasr/runtime/grpc/paraformer_server.h b/funasr/runtime/grpc/paraformer_server.h
index e42e041..dba1e45 100644
--- a/funasr/runtime/grpc/paraformer_server.h
+++ b/funasr/runtime/grpc/paraformer_server.h
@@ -15,7 +15,7 @@
#include <chrono>
#include "paraformer.grpc.pb.h"
-#include "librapidasrapi.h"
+#include "libfunasrapi.h"
using grpc::Server;
@@ -35,22 +35,16 @@
{
std::string msg;
float snippet_time;
-}RPASR_RECOG_RESULT;
+}FUNASR_RECOG_RESULT;
class ASRServicer final : public ASR::Service {
private:
int init_flag;
- std::unordered_map<std::string, std::string> client_buffers;
- std::unordered_map<std::string, std::string> client_transcription;
public:
ASRServicer(const char* model_path, int thread_num, bool quantize);
- void clear_states(const std::string& user);
- void clear_buffers(const std::string& user);
- void clear_transcriptions(const std::string& user);
- void disconnect(const std::string& user);
grpc::Status Recognize(grpc::ServerContext* context, grpc::ServerReaderWriter<Response, Request>* stream);
- RPASR_HANDLE AsrHanlde;
+ FUNASR_HANDLE AsrHanlde;
};
diff --git a/funasr/runtime/onnxruntime/include/libfunasrapi.h b/funasr/runtime/onnxruntime/include/libfunasrapi.h
new file mode 100644
index 0000000..6e81fa9
--- /dev/null
+++ b/funasr/runtime/onnxruntime/include/libfunasrapi.h
@@ -0,0 +1,77 @@
+#pragma once
+
+#ifdef WIN32
+#ifdef _FUNASR_API_EXPORT
+#define _FUNASRAPI __declspec(dllexport)
+#else
+#define _FUNASRAPI __declspec(dllimport)
+#endif
+#else
+#define _FUNASRAPI
+#endif
+
+#ifndef _WIN32
+#define FUNASR_CALLBCK_PREFIX __attribute__((__stdcall__))
+#else
+#define FUNASR_CALLBCK_PREFIX __stdcall
+#endif
+
+#ifdef __cplusplus
+
+extern "C" {
+#endif
+
+typedef void* FUNASR_HANDLE;
+typedef void* FUNASR_RESULT;
+typedef unsigned char FUNASR_BOOL;
+
+#define FUNASR_TRUE 1
+#define FUNASR_FALSE 0
+#define QM_DEFAULT_THREAD_NUM 4
+
+typedef enum
+{
+ RASR_NONE=-1,
+ RASRM_CTC_GREEDY_SEARCH=0,
+ RASRM_CTC_RPEFIX_BEAM_SEARCH = 1,
+ RASRM_ATTENSION_RESCORING = 2,
+
+}FUNASR_MODE;
+
+typedef enum {
+ FUNASR_MODEL_PADDLE = 0,
+ FUNASR_MODEL_PADDLE_2 = 1,
+ FUNASR_MODEL_K2 = 2,
+ FUNASR_MODEL_PARAFORMER = 3,
+
+}FUNASR_MODEL_TYPE;
+
+typedef void (* QM_CALLBACK)(int nCurStep, int nTotal); // nTotal: total steps; nCurStep: Current Step.
+
+// APIs for qmasr
+_FUNASRAPI FUNASR_HANDLE FunASRInit(const char* szModelDir, int nThread, bool quantize);
+
+
+// if not give a fnCallback ,it should be NULL
+_FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
+
+_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
+
+_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
+
+_FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* szWavfile, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
+
+_FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT Result,int nIndex);
+
+_FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT Result);
+
+_FUNASRAPI void FunASRFreeResult(FUNASR_RESULT Result);
+
+_FUNASRAPI void FunASRUninit(FUNASR_HANDLE Handle);
+
+_FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT Result);
+
+#ifdef __cplusplus
+
+}
+#endif
diff --git a/funasr/runtime/onnxruntime/include/librapidasrapi.h b/funasr/runtime/onnxruntime/include/librapidasrapi.h
deleted file mode 100644
index 918e574..0000000
--- a/funasr/runtime/onnxruntime/include/librapidasrapi.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#pragma once
-
-#ifdef WIN32
-#ifdef _RPASR_API_EXPORT
-#define _RAPIDASRAPI __declspec(dllexport)
-#else
-#define _RAPIDASRAPI __declspec(dllimport)
-#endif
-#else
-#define _RAPIDASRAPI
-#endif
-
-#ifndef _WIN32
-#define RPASR_CALLBCK_PREFIX __attribute__((__stdcall__))
-#else
-#define RPASR_CALLBCK_PREFIX __stdcall
-#endif
-
-#ifdef __cplusplus
-
-extern "C" {
-#endif
-
-typedef void* RPASR_HANDLE;
-typedef void* RPASR_RESULT;
-typedef unsigned char RPASR_BOOL;
-
-#define RPASR_TRUE 1
-#define RPASR_FALSE 0
-#define QM_DEFAULT_THREAD_NUM 4
-
-typedef enum
-{
- RASR_NONE=-1,
- RASRM_CTC_GREEDY_SEARCH=0,
- RASRM_CTC_RPEFIX_BEAM_SEARCH = 1,
- RASRM_ATTENSION_RESCORING = 2,
-
-}RPASR_MODE;
-
-typedef enum {
- RPASR_MODEL_PADDLE = 0,
- RPASR_MODEL_PADDLE_2 = 1,
- RPASR_MODEL_K2 = 2,
- RPASR_MODEL_PARAFORMER = 3,
-
-}RPASR_MODEL_TYPE;
-
-typedef void (* QM_CALLBACK)(int nCurStep, int nTotal); // nTotal: total steps; nCurStep: Current Step.
-
-// APIs for qmasr
-_RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThread, bool quantize);
-
-
-// if not give a fnCallback ,it should be NULL
-_RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
-
-_RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
-
-_RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback);
-
-_RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback);
-
-_RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex);
-
-_RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result);
-
-_RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result);
-
-_RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE Handle);
-
-_RAPIDASRAPI const float RapidAsrGetRetSnippetTime(RPASR_RESULT Result);
-
-#ifdef __cplusplus
-
-}
-#endif
\ No newline at end of file
diff --git a/funasr/runtime/onnxruntime/src/CMakeLists.txt b/funasr/runtime/onnxruntime/src/CMakeLists.txt
index 2a281eb..c07aac5 100644
--- a/funasr/runtime/onnxruntime/src/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/src/CMakeLists.txt
@@ -6,38 +6,38 @@
# message("${files}")
-add_library(rapidasr ${files})
+add_library(funasr ${files})
if(WIN32)
set(EXTRA_LIBS libfftw3f-3 yaml-cpp)
if(CMAKE_CL_64)
- target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
+ target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
else()
- target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
+ target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
endif()
- target_include_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
+ target_include_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
- target_compile_definitions(rapidasr PUBLIC -D_RPASR_API_EXPORT)
+ target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT)
else()
set(EXTRA_LIBS fftw3f pthread yaml-cpp)
- target_include_directories(rapidasr PUBLIC "/usr/local/opt/fftw/include")
- target_link_directories(rapidasr PUBLIC "/usr/local/opt/fftw/lib")
+ target_include_directories(funasr PUBLIC "/usr/local/opt/fftw/include")
+ target_link_directories(funasr PUBLIC "/usr/local/opt/fftw/lib")
- target_include_directories(rapidasr PUBLIC "/usr/local/opt/openblas/include")
- target_link_directories(rapidasr PUBLIC "/usr/local/opt/openblas/lib")
+ target_include_directories(funasr PUBLIC "/usr/local/opt/openblas/include")
+ target_link_directories(funasr PUBLIC "/usr/local/opt/openblas/lib")
- target_include_directories(rapidasr PUBLIC "/usr/include")
- target_link_directories(rapidasr PUBLIC "/usr/lib64")
+ target_include_directories(funasr PUBLIC "/usr/include")
+ target_link_directories(funasr PUBLIC "/usr/lib64")
- target_include_directories(rapidasr PUBLIC ${FFTW3F_INCLUDE_DIR})
- target_link_directories(rapidasr PUBLIC ${FFTW3F_LIBRARY_DIR})
+ target_include_directories(funasr PUBLIC ${FFTW3F_INCLUDE_DIR})
+ target_link_directories(funasr PUBLIC ${FFTW3F_LIBRARY_DIR})
include_directories(${ONNXRUNTIME_DIR}/include)
endif()
include_directories(${CMAKE_SOURCE_DIR}/include)
-target_link_libraries(rapidasr PUBLIC onnxruntime ${EXTRA_LIBS})
+target_link_libraries(funasr PUBLIC onnxruntime ${EXTRA_LIBS})
diff --git a/funasr/runtime/onnxruntime/src/FeatureExtract.cpp b/funasr/runtime/onnxruntime/src/FeatureExtract.cpp
index 1b0c3c4..6d2826a 100644
--- a/funasr/runtime/onnxruntime/src/FeatureExtract.cpp
+++ b/funasr/runtime/onnxruntime/src/FeatureExtract.cpp
@@ -5,14 +5,10 @@
FeatureExtract::FeatureExtract(int mode) : mode(mode)
{
- fftw_init();
}
FeatureExtract::~FeatureExtract()
{
- fftwf_free(fft_input);
- fftwf_free(fft_out);
- fftwf_destroy_plan(p);
}
void FeatureExtract::reset()
@@ -26,34 +22,25 @@
return fqueue.size();
}
-void FeatureExtract::fftw_init()
+void FeatureExtract::insert(fftwf_plan plan, float *din, int len, int flag)
{
- int fft_size = 512;
- fft_input = (float *)fftwf_malloc(sizeof(float) * fft_size);
- fft_out = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * fft_size);
+ float* fft_input = (float *)fftwf_malloc(sizeof(float) * fft_size);
+ fftwf_complex* fft_out = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * fft_size);
memset(fft_input, 0, sizeof(float) * fft_size);
- p = fftwf_plan_dft_r2c_1d(fft_size, fft_input, fft_out, FFTW_ESTIMATE);
-}
-void FeatureExtract::insert(float *din, int len, int flag)
-{
const float *window = (const float *)&window_hex;
if (mode == 3)
window = (const float *)&window_hamm_hex;
-
- int window_size = 400;
- int fft_size = 512;
- int window_shift = 160;
speech.load(din, len);
int i, j;
float tmp_feature[80];
if (mode == 0 || mode == 2 || mode == 3) {
- int ll = (speech.size() - 400) / 160 + 1;
+ int ll = (speech.size() - window_size) / window_shift + 1;
fqueue.reinit(ll);
}
- for (i = 0; i <= speech.size() - 400; i = i + window_shift) {
+ for (i = 0; i <= speech.size() - window_size; i = i + window_shift) {
float tmp_mean = 0;
for (j = 0; j < window_size; j++) {
tmp_mean += speech[i + j];
@@ -70,7 +57,7 @@
pre_val = cur_val;
}
- fftwf_execute(p);
+ fftwf_execute_dft_r2c(plan, fft_input, fft_out);
melspect((float *)fft_out, tmp_feature);
int tmp_flag = S_MIDDLE;
@@ -80,6 +67,8 @@
fqueue.push(tmp_feature, tmp_flag);
}
speech.update(i);
+ fftwf_free(fft_input);
+ fftwf_free(fft_out);
}
bool FeatureExtract::fetch(Tensor<float> *&dout)
@@ -128,7 +117,6 @@
void FeatureExtract::melspect(float *din, float *dout)
{
float fftmag[256];
-// float tmp;
const float *melcoe = (const float *)melcoe_hex;
int i;
for (i = 0; i < 256; i++) {
diff --git a/funasr/runtime/onnxruntime/src/FeatureExtract.h b/funasr/runtime/onnxruntime/src/FeatureExtract.h
index f16ea3a..8296253 100644
--- a/funasr/runtime/onnxruntime/src/FeatureExtract.h
+++ b/funasr/runtime/onnxruntime/src/FeatureExtract.h
@@ -14,12 +14,11 @@
SpeechWrap speech;
FeatureQueue fqueue;
int mode;
+ int fft_size = 512;
+ int window_size = 400;
+ int window_shift = 160;
- float *fft_input;
- fftwf_complex *fft_out;
- fftwf_plan p;
-
- void fftw_init();
+ //void fftw_init();
void melspect(float *din, float *dout);
void global_cmvn(float *din);
@@ -27,9 +26,9 @@
FeatureExtract(int mode);
~FeatureExtract();
int size();
- int status();
+ //int status();
void reset();
- void insert(float *din, int len, int flag);
+ void insert(fftwf_plan plan, float *din, int len, int flag);
bool fetch(Tensor<float> *&dout);
};
diff --git a/funasr/runtime/onnxruntime/src/commonfunc.h b/funasr/runtime/onnxruntime/src/commonfunc.h
index 11c234e..5198030 100644
--- a/funasr/runtime/onnxruntime/src/commonfunc.h
+++ b/funasr/runtime/onnxruntime/src/commonfunc.h
@@ -5,7 +5,7 @@
{
std::string msg;
float snippet_time;
-}RPASR_RECOG_RESULT;
+}FUNASR_RECOG_RESULT;
#ifdef _WIN32
@@ -53,4 +53,4 @@
}
}
-}
\ No newline at end of file
+}
diff --git a/funasr/runtime/onnxruntime/src/librapidasrapi.cpp b/funasr/runtime/onnxruntime/src/libfunasrapi.cpp
similarity index 64%
rename from funasr/runtime/onnxruntime/src/librapidasrapi.cpp
rename to funasr/runtime/onnxruntime/src/libfunasrapi.cpp
index 62f47a5..0d77d20 100644
--- a/funasr/runtime/onnxruntime/src/librapidasrapi.cpp
+++ b/funasr/runtime/onnxruntime/src/libfunasrapi.cpp
@@ -5,13 +5,13 @@
#endif
// APIs for qmasr
- _RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThreadNum, bool quantize)
+ _FUNASRAPI FUNASR_HANDLE FunASRInit(const char* szModelDir, int nThreadNum, bool quantize)
{
Model* mm = create_model(szModelDir, nThreadNum, quantize);
return mm;
}
- _RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback)
+ _FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
{
Model* pRecogObj = (Model*)handle;
if (!pRecogObj)
@@ -25,12 +25,12 @@
float* buff;
int len;
int flag=0;
- RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
+ FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
pResult->snippet_time = audio.get_time_len();
int nStep = 0;
int nTotal = audio.get_queue_size();
while (audio.fetch(buff, len, flag) > 0) {
- pRecogObj->reset();
+ //pRecogObj->reset();
string msg = pRecogObj->forward(buff, len, flag);
pResult->msg += msg;
nStep++;
@@ -41,7 +41,7 @@
return pResult;
}
- _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback)
+ _FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
{
Model* pRecogObj = (Model*)handle;
if (!pRecogObj)
@@ -55,12 +55,12 @@
float* buff;
int len;
int flag = 0;
- RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
+ FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
pResult->snippet_time = audio.get_time_len();
int nStep = 0;
int nTotal = audio.get_queue_size();
while (audio.fetch(buff, len, flag) > 0) {
- pRecogObj->reset();
+ //pRecogObj->reset();
string msg = pRecogObj->forward(buff, len, flag);
pResult->msg += msg;
nStep++;
@@ -71,7 +71,7 @@
return pResult;
}
- _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback)
+ _FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
{
Model* pRecogObj = (Model*)handle;
if (!pRecogObj)
@@ -85,12 +85,12 @@
float* buff;
int len;
int flag = 0;
- RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
+ FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
pResult->snippet_time = audio.get_time_len();
int nStep = 0;
int nTotal = audio.get_queue_size();
while (audio.fetch(buff, len, flag) > 0) {
- pRecogObj->reset();
+ //pRecogObj->reset();
string msg = pRecogObj->forward(buff, len, flag);
pResult->msg += msg;
nStep++;
@@ -101,7 +101,7 @@
return pResult;
}
- _RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback)
+ _FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* szWavfile, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
{
Model* pRecogObj = (Model*)handle;
if (!pRecogObj)
@@ -117,10 +117,10 @@
int flag = 0;
int nStep = 0;
int nTotal = audio.get_queue_size();
- RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
+ FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
pResult->snippet_time = audio.get_time_len();
while (audio.fetch(buff, len, flag) > 0) {
- pRecogObj->reset();
+ //pRecogObj->reset();
string msg = pRecogObj->forward(buff, len, flag);
pResult->msg+= msg;
nStep++;
@@ -131,7 +131,7 @@
return pResult;
}
- _RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result)
+ _FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT Result)
{
if (!Result)
return 0;
@@ -140,32 +140,32 @@
}
- _RAPIDASRAPI const float RapidAsrGetRetSnippetTime(RPASR_RESULT Result)
+ _FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT Result)
{
if (!Result)
return 0.0f;
- return ((RPASR_RECOG_RESULT*)Result)->snippet_time;
+ return ((FUNASR_RECOG_RESULT*)Result)->snippet_time;
}
- _RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex)
+ _FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT Result,int nIndex)
{
- RPASR_RECOG_RESULT * pResult = (RPASR_RECOG_RESULT*)Result;
+ FUNASR_RECOG_RESULT * pResult = (FUNASR_RECOG_RESULT*)Result;
if(!pResult)
return nullptr;
return pResult->msg.c_str();
}
- _RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result)
+ _FUNASRAPI void FunASRFreeResult(FUNASR_RESULT Result)
{
if (Result)
{
- delete (RPASR_RECOG_RESULT*)Result;
+ delete (FUNASR_RECOG_RESULT*)Result;
}
}
- _RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE handle)
+ _FUNASRAPI void FunASRUninit(FUNASR_HANDLE handle)
{
Model* pRecogObj = (Model*)handle;
diff --git a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp b/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
index a49069c..678cdf6 100644
--- a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
+++ b/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
@@ -18,7 +18,10 @@
cmvn_path = pathAppend(path, "am.mvn");
config_path = pathAppend(path, "config.yaml");
- fe = new FeatureExtract(3);
+ fft_input = (float *)fftwf_malloc(sizeof(float) * fft_size);
+ fft_out = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * fft_size);
+ memset(fft_input, 0, sizeof(float) * fft_size);
+ plan = fftwf_plan_dft_r2c_1d(fft_size, fft_input, fft_out, FFTW_ESTIMATE);
//sessionOptions.SetInterOpNumThreads(1);
sessionOptions.SetIntraOpNumThreads(nNumThread);
@@ -52,8 +55,6 @@
ModelImp::~ModelImp()
{
- if(fe)
- delete fe;
if (m_session)
{
delete m_session;
@@ -61,11 +62,15 @@
}
if(vocab)
delete vocab;
+ fftwf_free(fft_input);
+ fftwf_free(fft_out);
+ fftwf_destroy_plan(plan);
+ fftwf_cleanup();
}
void ModelImp::reset()
{
- fe->reset();
+ printf("Not Imp!!!!!!\n");
}
void ModelImp::apply_lfr(Tensor<float>*& din)
@@ -159,9 +164,10 @@
string ModelImp::forward(float* din, int len, int flag)
{
-
Tensor<float>* in;
- fe->insert(din, len, flag);
+ FeatureExtract* fe = new FeatureExtract(3);
+ fe->reset();
+ fe->insert(plan, din, len, flag);
fe->fetch(in);
apply_lfr(in);
apply_cmvn(in);
@@ -192,7 +198,6 @@
auto outputTensor = m_session->Run(run_option, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size());
std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
-
int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
float* floatData = outputTensor[0].GetTensorMutableData<float>();
auto encoder_out_lens = outputTensor[1].GetTensorMutableData<int64_t>();
@@ -203,9 +208,14 @@
result = "";
}
-
- if(in)
+ if(in){
delete in;
+ in = nullptr;
+ }
+ if(fe){
+ delete fe;
+ fe = nullptr;
+ }
return result;
}
diff --git a/funasr/runtime/onnxruntime/src/paraformer_onnx.h b/funasr/runtime/onnxruntime/src/paraformer_onnx.h
index 395c328..e763be2 100644
--- a/funasr/runtime/onnxruntime/src/paraformer_onnx.h
+++ b/funasr/runtime/onnxruntime/src/paraformer_onnx.h
@@ -8,7 +8,10 @@
class ModelImp : public Model {
private:
- FeatureExtract* fe;
+ int fft_size=512;
+ float *fft_input;
+ fftwf_complex *fft_out;
+ fftwf_plan plan;
Vocab* vocab;
vector<float> means_list;
@@ -34,8 +37,6 @@
vector<string> m_strInputNames, m_strOutputNames;
vector<const char*> m_szInputNames;
vector<const char*> m_szOutputNames;
- //string m_strInputName, m_strInputNameLen;
- //string m_strOutputName, m_strOutputNameLen;
public:
ModelImp(const char* path, int nNumThread=0, bool quantize=false);
diff --git a/funasr/runtime/onnxruntime/src/precomp.h b/funasr/runtime/onnxruntime/src/precomp.h
index c9f43bf..678a3e4 100644
--- a/funasr/runtime/onnxruntime/src/precomp.h
+++ b/funasr/runtime/onnxruntime/src/precomp.h
@@ -46,7 +46,7 @@
#include <Audio.h>
#include "Model.h"
#include "paraformer_onnx.h"
-#include "librapidasrapi.h"
+#include "libfunasrapi.h"
using namespace paraformer;
diff --git a/funasr/runtime/onnxruntime/tester/CMakeLists.txt b/funasr/runtime/onnxruntime/tester/CMakeLists.txt
index f66319d..e3224e3 100644
--- a/funasr/runtime/onnxruntime/tester/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/tester/CMakeLists.txt
@@ -8,7 +8,7 @@
endif()
endif()
-set(EXTRA_LIBS rapidasr)
+set(EXTRA_LIBS funasr)
include_directories(${CMAKE_SOURCE_DIR}/include)
diff --git a/funasr/runtime/onnxruntime/tester/tester.cpp b/funasr/runtime/onnxruntime/tester/tester.cpp
index 35d534f..7257603 100644
--- a/funasr/runtime/onnxruntime/tester/tester.cpp
+++ b/funasr/runtime/onnxruntime/tester/tester.cpp
@@ -5,7 +5,7 @@
#include <win_func.h>
#endif
-#include "librapidasrapi.h"
+#include "libfunasrapi.h"
#include <iostream>
#include <fstream>
@@ -26,7 +26,7 @@
// is quantize
bool quantize = false;
istringstream(argv[3]) >> boolalpha >> quantize;
- RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum, quantize);
+ FUNASR_HANDLE AsrHanlde=FunASRInit(argv[1], nThreadNum, quantize);
if (!AsrHanlde)
{
@@ -42,62 +42,22 @@
gettimeofday(&start, NULL);
float snippet_time = 0.0f;
- RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL);
+ FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL);
gettimeofday(&end, NULL);
if (Result)
{
- string msg = RapidAsrGetResult(Result, 0);
+ string msg = FunASRGetResult(Result, 0);
setbuf(stdout, NULL);
- cout << "Result: \"";
- cout << msg << "\"." << endl;
- snippet_time = RapidAsrGetRetSnippetTime(Result);
- RapidAsrFreeResult(Result);
+ printf("Result: %s \n", msg.c_str());
+ snippet_time = FunASRGetRetSnippetTime(Result);
+ FunASRFreeResult(Result);
}
else
{
cout <<"no return data!";
}
-
- //char* buff = nullptr;
- //int len = 0;
- //ifstream ifs(argv[2], std::ios::binary | std::ios::in);
- //if (ifs.is_open())
- //{
- // ifs.seekg(0, std::ios::end);
- // len = ifs.tellg();
- // ifs.seekg(0, std::ios::beg);
-
- // buff = new char[len];
-
- // ifs.read(buff, len);
-
-
- // //RPASR_RESULT Result = RapidAsrRecogPCMFile(AsrHanlde, argv[2], RASR_NONE, NULL);
-
- // RPASR_RESULT Result=RapidAsrRecogPCMBuffer(AsrHanlde, buff,len, RASR_NONE, NULL);
- // //RPASR_RESULT Result = RapidAsrRecogPCMFile(AsrHanlde, argv[2], RASR_NONE, NULL);
- // gettimeofday(&end, NULL);
- //
- // if (Result)
- // {
- // string msg = RapidAsrGetResult(Result, 0);
- // setbuf(stdout, NULL);
- // cout << "Result: \"";
- // cout << msg << endl;
- // cout << "\"." << endl;
- // snippet_time = RapidAsrGetRetSnippetTime(Result);
- // RapidAsrFreeResult(Result);
- // }
- // else
- // {
- // cout <<"no return data!";
- // }
-
- //
- //delete[]buff;
- //}
printf("Audio length %lfs.\n", (double)snippet_time);
seconds = (end.tv_sec - start.tv_sec);
@@ -105,9 +65,9 @@
printf("Model inference takes %lfs.\n", (double)taking_micros / 1000000);
printf("Model inference RTF: %04lf.\n", (double)taking_micros/ (snippet_time*1000000));
- RapidAsrUninit(AsrHanlde);
+ FunASRUninit(AsrHanlde);
return 0;
}
-
\ No newline at end of file
+
diff --git a/funasr/runtime/onnxruntime/tester/tester_rtf.cpp b/funasr/runtime/onnxruntime/tester/tester_rtf.cpp
index 9651900..dd79887 100644
--- a/funasr/runtime/onnxruntime/tester/tester_rtf.cpp
+++ b/funasr/runtime/onnxruntime/tester/tester_rtf.cpp
@@ -5,7 +5,7 @@
#include <win_func.h>
#endif
-#include "librapidasrapi.h"
+#include "libfunasrapi.h"
#include <iostream>
#include <fstream>
@@ -47,7 +47,7 @@
bool quantize = false;
istringstream(argv[3]) >> boolalpha >> quantize;
- RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum, quantize);
+ FUNASR_HANDLE AsrHanlde=FunASRInit(argv[1], nThreadNum, quantize);
if (!AsrHanlde)
{
printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
@@ -61,7 +61,7 @@
// warm up
for (size_t i = 0; i < 30; i++)
{
- RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, wav_list[0].c_str(), RASR_NONE, NULL);
+ FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, wav_list[0].c_str(), RASR_NONE, NULL);
}
// forward
@@ -72,19 +72,19 @@
for (size_t i = 0; i < wav_list.size(); i++)
{
gettimeofday(&start, NULL);
- RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, wav_list[i].c_str(), RASR_NONE, NULL);
+ FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, wav_list[i].c_str(), RASR_NONE, NULL);
gettimeofday(&end, NULL);
seconds = (end.tv_sec - start.tv_sec);
long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
total_time += taking_micros;
if(Result){
- string msg = RapidAsrGetResult(Result, 0);
- printf("Result: %s \n", msg);
+ string msg = FunASRGetResult(Result, 0);
+ printf("Result: %s \n", msg.c_str());
- snippet_time = RapidAsrGetRetSnippetTime(Result);
+ snippet_time = FunASRGetRetSnippetTime(Result);
total_length += snippet_time;
- RapidAsrFreeResult(Result);
+ FunASRFreeResult(Result);
}else{
cout <<"No return data!";
}
@@ -94,6 +94,6 @@
printf("total_time_comput %ld ms.\n", total_time / 1000);
printf("total_rtf %05lf .\n", (double)total_time/ (total_length*1000000));
- RapidAsrUninit(AsrHanlde);
+ FunASRUninit(AsrHanlde);
return 0;
}
diff --git a/funasr/runtime/python/grpc/grpc_main_client.py b/funasr/runtime/python/grpc/grpc_main_client.py
new file mode 100644
index 0000000..b6491df
--- /dev/null
+++ b/funasr/runtime/python/grpc/grpc_main_client.py
@@ -0,0 +1,62 @@
+import grpc
+import json
+import time
+import asyncio
+import soundfile as sf
+import argparse
+
+from grpc_client import transcribe_audio_bytes
+from paraformer_pb2_grpc import ASRStub
+
+# send the audio data once
+async def grpc_rec(wav_scp, grpc_uri, asr_user, language):
+ with grpc.insecure_channel(grpc_uri) as channel:
+ stub = ASRStub(channel)
+ for line in wav_scp:
+ wav_file = line.split()[1]
+ wav, _ = sf.read(wav_file, dtype='int16')
+
+ b = time.time()
+ response = transcribe_audio_bytes(stub, wav.tobytes(), user=asr_user, language=language, speaking=False, isEnd=False)
+ resp = response.next()
+ text = ''
+ if 'decoding' == resp.action:
+ resp = response.next()
+ if 'finish' == resp.action:
+ text = json.loads(resp.sentence)['text']
+ response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking=False, isEnd=True)
+ res= {'text': text, 'time': time.time() - b}
+ print(res)
+
+async def test(args):
+ wav_scp = open(args.wav_scp, "r").readlines()
+ uri = '{}:{}'.format(args.host, args.port)
+ res = await grpc_rec(wav_scp, uri, args.user_allowed, language = 'zh-CN')
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--host",
+ type=str,
+ default="127.0.0.1",
+ required=False,
+ help="grpc server host ip")
+ parser.add_argument("--port",
+ type=int,
+ default=10108,
+ required=False,
+ help="grpc server port")
+ parser.add_argument("--user_allowed",
+ type=str,
+ default="project1_user1",
+ help="allowed user for grpc client")
+ parser.add_argument("--sample_rate",
+ type=int,
+ default=16000,
+ help="audio sample_rate from client")
+ parser.add_argument("--wav_scp",
+ type=str,
+ required=True,
+ help="audio wav scp")
+ args = parser.parse_args()
+
+ asyncio.run(test(args))
diff --git a/funasr/runtime/python/libtorch/funasr_torch/utils/utils.py b/funasr/runtime/python/libtorch/funasr_torch/utils/utils.py
index cafc43b..86e78bc 100644
--- a/funasr/runtime/python/libtorch/funasr_torch/utils/utils.py
+++ b/funasr/runtime/python/libtorch/funasr_torch/utils/utils.py
@@ -23,9 +23,11 @@
):
check_argument_types()
- # self.token_list = self.load_token(token_path)
self.token_list = token_list
self.unk_symbol = token_list[-1]
+ self.token2id = {v: i for i, v in enumerate(self.token_list)}
+ self.unk_id = self.token2id[self.unk_symbol]
+
def get_num_vocabulary_size(self) -> int:
return len(self.token_list)
@@ -38,13 +40,8 @@
return [self.token_list[i] for i in integers]
def tokens2ids(self, tokens: Iterable[str]) -> List[int]:
- token2id = {v: i for i, v in enumerate(self.token_list)}
- if self.unk_symbol not in token2id:
- raise TokenIDConverterError(
- f"Unknown symbol '{self.unk_symbol}' doesn't exist in the token_list"
- )
- unk_id = token2id[self.unk_symbol]
- return [token2id.get(i, unk_id) for i in tokens]
+
+ return [self.token2id.get(i, self.unk_id) for i in tokens]
class CharTokenizer():
diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py b/funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py
index 0df954e..78c3f0d 100644
--- a/funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py
+++ b/funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py
@@ -24,21 +24,11 @@
):
check_argument_types()
- # self.token_list = self.load_token(token_path)
self.token_list = token_list
self.unk_symbol = token_list[-1]
+ self.token2id = {v: i for i, v in enumerate(self.token_list)}
+ self.unk_id = self.token2id[self.unk_symbol]
- # @staticmethod
- # def load_token(file_path: Union[Path, str]) -> List:
- # if not Path(file_path).exists():
- # raise TokenIDConverterError(f'The {file_path} does not exist.')
- #
- # with open(str(file_path), 'rb') as f:
- # token_list = pickle.load(f)
- #
- # if len(token_list) != len(set(token_list)):
- # raise TokenIDConverterError('The Token exists duplicated symbol.')
- # return token_list
def get_num_vocabulary_size(self) -> int:
return len(self.token_list)
@@ -51,13 +41,8 @@
return [self.token_list[i] for i in integers]
def tokens2ids(self, tokens: Iterable[str]) -> List[int]:
- token2id = {v: i for i, v in enumerate(self.token_list)}
- if self.unk_symbol not in token2id:
- raise TokenIDConverterError(
- f"Unknown symbol '{self.unk_symbol}' doesn't exist in the token_list"
- )
- unk_id = token2id[self.unk_symbol]
- return [token2id.get(i, unk_id) for i in tokens]
+
+ return [self.token2id.get(i, self.unk_id) for i in tokens]
class CharTokenizer():
diff --git a/funasr/tasks/abs_task.py b/funasr/tasks/abs_task.py
index d4b8a72..e70b062 100644
--- a/funasr/tasks/abs_task.py
+++ b/funasr/tasks/abs_task.py
@@ -1582,10 +1582,11 @@
) -> AbsIterFactory:
assert check_argument_types()
- if args.frontend_conf is not None and "fs" in args.frontend_conf:
- dest_sample_rate = args.frontend_conf["fs"]
- else:
- dest_sample_rate = 16000
+ if hasattr(args, "frontend_conf"):
+ if args.frontend_conf is not None and "fs" in args.frontend_conf:
+ dest_sample_rate = args.frontend_conf["fs"]
+ else:
+ dest_sample_rate = 16000
dataset = ESPnetDataset(
iter_options.data_path_and_name_and_type,
diff --git a/funasr/version.txt b/funasr/version.txt
index 1c09c74..1d0ba9e 100644
--- a/funasr/version.txt
+++ b/funasr/version.txt
@@ -1 +1 @@
-0.3.3
+0.4.0
--
Gitblit v1.9.1