From 1819303f5e8cfc03f4c0ec2495571a54a186d34b Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期二, 29 十月 2024 11:40:18 +0800
Subject: [PATCH] support SenseVoiceSmall in 2pass mode
---
runtime/websocket/bin/websocket-server-2pass.cpp | 26 +++++++++++++++++++++-----
1 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/runtime/websocket/bin/websocket-server-2pass.cpp b/runtime/websocket/bin/websocket-server-2pass.cpp
index 8c8cab4..ff23e9d 100644
--- a/runtime/websocket/bin/websocket-server-2pass.cpp
+++ b/runtime/websocket/bin/websocket-server-2pass.cpp
@@ -111,7 +111,9 @@
int audio_fs,
std::string wav_format,
FUNASR_HANDLE& tpass_online_handle,
- FUNASR_DEC_HANDLE& decoder_handle) {
+ FUNASR_DEC_HANDLE& decoder_handle,
+ std::string svs_lang,
+ bool sys_itn) {
// lock for each connection
if(!tpass_online_handle){
scoped_lock guard(thread_lock);
@@ -140,7 +142,8 @@
subvector.data(), subvector.size(),
punc_cache, false, audio_fs,
wav_format, (ASR_TYPE)asr_mode_,
- hotwords_embedding, itn, decoder_handle);
+ hotwords_embedding, itn, decoder_handle,
+ svs_lang, sys_itn);
} else {
scoped_lock guard(thread_lock);
@@ -177,7 +180,8 @@
buffer.data(), buffer.size(), punc_cache,
is_final, audio_fs,
wav_format, (ASR_TYPE)asr_mode_,
- hotwords_embedding, itn, decoder_handle);
+ hotwords_embedding, itn, decoder_handle,
+ svs_lang, sys_itn);
} else {
scoped_lock guard(thread_lock);
msg["access_num"]=(int)msg["access_num"]-1;
@@ -250,6 +254,8 @@
data_msg->msg["audio_fs"] = 16000; // default is 16k
data_msg->msg["access_num"] = 0; // the number of access for this object, when it is 0, we can free it saftly
data_msg->msg["is_eof"]=false; // if this connection is closed
+ data_msg->msg["svs_lang"]="auto";
+ data_msg->msg["svs_itn"]=true;
FUNASR_DEC_HANDLE decoder_handle =
FunASRWfstDecoderInit(tpass_handle, ASR_TWO_PASS, global_beam_, lattice_beam_, am_scale_);
data_msg->decoder_handle = decoder_handle;
@@ -475,6 +481,12 @@
if (jsonresult.contains("itn")) {
msg_data->msg["itn"] = jsonresult["itn"];
}
+ if (jsonresult.contains("svs_lang")) {
+ msg_data->msg["svs_lang"] = jsonresult["svs_lang"];
+ }
+ if (jsonresult.contains("svs_itn")) {
+ msg_data->msg["svs_itn"] = jsonresult["svs_itn"];
+ }
LOG(INFO) << "jsonresult=" << jsonresult
<< ", msg_data->msg=" << msg_data->msg;
if ((jsonresult["is_speaking"] == false ||
@@ -499,7 +511,9 @@
msg_data->msg["audio_fs"],
msg_data->msg["wav_format"],
std::ref(msg_data->tpass_online_handle),
- std::ref(msg_data->decoder_handle)));
+ std::ref(msg_data->decoder_handle),
+ msg_data->msg["svs_lang"],
+ msg_data->msg["svs_itn"]));
msg_data->msg["access_num"]=(int)(msg_data->msg["access_num"])+1;
}
catch (std::exception const &e)
@@ -547,7 +561,9 @@
msg_data->msg["audio_fs"],
msg_data->msg["wav_format"],
std::ref(msg_data->tpass_online_handle),
- std::ref(msg_data->decoder_handle)));
+ std::ref(msg_data->decoder_handle),
+ msg_data->msg["svs_lang"],
+ msg_data->msg["svs_itn"]));
msg_data->msg["access_num"]=(int)(msg_data->msg["access_num"])+1;
}
}
--
Gitblit v1.9.1