From f72914003a8c4ab7ae72d52dbd7c5f70ea22313a Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期一, 18 十二月 2023 17:33:24 +0800
Subject: [PATCH] add sentence timestamp
---
runtime/websocket/bin/websocket-server-2pass.cpp | 25 +++++++++++++++++++------
1 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/runtime/websocket/bin/websocket-server-2pass.cpp b/runtime/websocket/bin/websocket-server-2pass.cpp
index bd0c590..44dd82e 100644
--- a/runtime/websocket/bin/websocket-server-2pass.cpp
+++ b/runtime/websocket/bin/websocket-server-2pass.cpp
@@ -80,6 +80,12 @@
jsonresult["timestamp"] = tmp_stamp_msg;
}
+ std::string tmp_stamp_sents = FunASRGetStampSents(result);
+ if (tmp_stamp_sents != "") {
+ LOG(INFO) << "offline stamp_sents : " << tmp_stamp_sents;
+ jsonresult["stamp_sents"] = tmp_stamp_sents;
+ }
+
return jsonresult;
}
// feed buffer to asr engine for decoder
@@ -98,8 +104,8 @@
std::string wav_format,
FUNASR_HANDLE& tpass_online_handle) {
// lock for each connection
- scoped_lock guard(thread_lock);
if(!tpass_online_handle){
+ scoped_lock guard(thread_lock);
LOG(INFO) << "tpass_online_handle is free, return";
msg["access_num"]=(int)msg["access_num"]-1;
return;
@@ -128,10 +134,12 @@
hotwords_embedding, itn);
} else {
+ scoped_lock guard(thread_lock);
msg["access_num"]=(int)msg["access_num"]-1;
return;
}
} catch (std::exception const& e) {
+ scoped_lock guard(thread_lock);
LOG(ERROR) << e.what();
msg["access_num"]=(int)msg["access_num"]-1;
return;
@@ -162,10 +170,12 @@
wav_format, (ASR_TYPE)asr_mode_,
hotwords_embedding, itn);
} else {
+ scoped_lock guard(thread_lock);
msg["access_num"]=(int)msg["access_num"]-1;
return;
}
} catch (std::exception const& e) {
+ scoped_lock guard(thread_lock);
LOG(ERROR) << e.what();
msg["access_num"]=(int)msg["access_num"]-1;
return;
@@ -209,6 +219,7 @@
} catch (std::exception const& e) {
std::cerr << "Error: " << e.what() << std::endl;
}
+ scoped_lock guard(thread_lock);
msg["access_num"]=(int)msg["access_num"]-1;
}
@@ -227,7 +238,7 @@
data_msg->msg["wav_name"] = "wav-default-id";
data_msg->msg["mode"] = "2pass";
data_msg->msg["itn"] = true;
- data_msg->msg["audio_fs"] = 16000;
+ data_msg->msg["audio_fs"] = 16000; // default is 16k
data_msg->msg["access_num"] = 0; // the number of access for this object, when it is 0, we can free it saftly
data_msg->msg["is_eof"]=false; // if this connection is closed
data_msg->punc_cache =
@@ -313,7 +324,7 @@
data_msg->msg["is_eof"]=true;
guard_decoder.unlock();
to_remove.push_back(hdl);
- LOG(INFO)<<"connection is closed: "<<e.what();
+ LOG(INFO)<<"connection is closed.";
}
iter++;
@@ -452,7 +463,9 @@
LOG(INFO) << "jsonresult=" << jsonresult
<< ", msg_data->msg=" << msg_data->msg;
if ((jsonresult["is_speaking"] == false ||
- jsonresult["is_finished"] == true) && msg_data->msg["is_eof"] != true) {
+ jsonresult["is_finished"] == true) &&
+ msg_data->msg["is_eof"] != true &&
+ msg_data->hotwords_embedding != NULL) {
LOG(INFO) << "client done";
// if it is in final message, post the sample_data to decode
@@ -503,9 +516,9 @@
try{
// post to decode
- if (msg_data->msg["is_eof"] != true) {
+ if (msg_data->msg["is_eof"] != true && msg_data->hotwords_embedding != NULL) {
std::vector<std::vector<float>> hotwords_embedding_(*(msg_data->hotwords_embedding));
- msg_data->strand_->post(
+ msg_data->strand_->post(
std::bind(&WebSocketServer::do_decoder, this,
std::move(subvector), std::move(hdl),
std::ref(msg_data->msg),
--
Gitblit v1.9.1