From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交
---
runtime/websocket/bin/funasr-wss-client-2pass.cpp | 57 ++++++++++++++++++++++++++++++++++++++++-----------------
1 files changed, 40 insertions(+), 17 deletions(-)
diff --git a/runtime/websocket/bin/funasr-wss-client-2pass.cpp b/runtime/websocket/bin/funasr-wss-client-2pass.cpp
index e2cce28..e8bbfc1 100644
--- a/runtime/websocket/bin/funasr-wss-client-2pass.cpp
+++ b/runtime/websocket/bin/funasr-wss-client-2pass.cpp
@@ -122,9 +122,9 @@
// This method will block until the connection is complete
void run(const std::string& uri, const std::vector<string>& wav_list,
- const std::vector<string>& wav_ids, std::string asr_mode,
+ const std::vector<string>& wav_ids, int audio_fs, std::string asr_mode,
std::vector<int> chunk_size, const std::unordered_map<std::string, int>& hws_map,
- bool is_record=false, int use_itn=1) {
+ bool is_record=false, int use_itn=1, int svs_itn=1) {
// Create a new connection to the given URI
websocketpp::lib::error_code ec;
typename websocketpp::client<T>::connection_ptr con =
@@ -146,9 +146,9 @@
websocketpp::lib::thread asio_thread(&websocketpp::client<T>::run,
&m_client);
if(is_record){
- send_rec_data(asr_mode, chunk_size, hws_map, use_itn);
+ send_rec_data(asr_mode, chunk_size, hws_map, use_itn, svs_itn);
}else{
- send_wav_data(wav_list[0], wav_ids[0], asr_mode, chunk_size, hws_map, use_itn);
+ send_wav_data(wav_list[0], wav_ids[0], audio_fs, asr_mode, chunk_size, hws_map, use_itn, svs_itn);
}
WaitABit();
@@ -183,20 +183,20 @@
m_done = true;
}
// send wav to server
- void send_wav_data(string wav_path, string wav_id, std::string asr_mode,
+ void send_wav_data(string wav_path, string wav_id, int audio_fs, std::string asr_mode,
std::vector<int> chunk_vector, const std::unordered_map<std::string, int>& hws_map,
- int use_itn) {
+ int use_itn, int svs_itn) {
uint64_t count = 0;
std::stringstream val;
funasr::Audio audio(1);
- int32_t sampling_rate = 16000;
+ int32_t sampling_rate = audio_fs;
std::string wav_format = "pcm";
if (funasr::IsTargetFile(wav_path.c_str(), "wav")) {
- int32_t sampling_rate = -1;
- if (!audio.LoadWav(wav_path.c_str(), &sampling_rate)) return;
+ if (!audio.LoadWav(wav_path.c_str(), &sampling_rate, false))
+ return;
} else if (funasr::IsTargetFile(wav_path.c_str(), "pcm")) {
- if (!audio.LoadPcmwav(wav_path.c_str(), &sampling_rate)) return;
+ if (!audio.LoadPcmwav(wav_path.c_str(), &sampling_rate, false)) return;
} else {
wav_format = "others";
if (!audio.LoadOthers2Char(wav_path.c_str())) return;
@@ -238,10 +238,15 @@
jsonbegin["chunk_size"] = chunk_size;
jsonbegin["wav_name"] = wav_id;
jsonbegin["wav_format"] = wav_format;
+ jsonbegin["audio_fs"] = sampling_rate;
jsonbegin["is_speaking"] = true;
jsonbegin["itn"] = true;
+ jsonbegin["svs_itn"] = true;
if(use_itn == 0){
jsonbegin["itn"] = false;
+ }
+ if(svs_itn == 0){
+ jsonbegin["svs_itn"] = false;
}
if(!hws_map.empty()){
LOG(INFO) << "hotwords: ";
@@ -334,7 +339,7 @@
}
void send_rec_data(std::string asr_mode, std::vector<int> chunk_vector,
- const std::unordered_map<std::string, int>& hws_map, int use_itn) {
+ const std::unordered_map<std::string, int>& hws_map, int use_itn, int svs_itn) {
// first message
bool wait = false;
while (1) {
@@ -360,6 +365,7 @@
}
websocketpp::lib::error_code ec;
+ float sample_rate = 16000;
nlohmann::json jsonbegin;
nlohmann::json chunk_size = nlohmann::json::array();
chunk_size.push_back(chunk_vector[0]);
@@ -369,10 +375,15 @@
jsonbegin["chunk_size"] = chunk_size;
jsonbegin["wav_name"] = "record";
jsonbegin["wav_format"] = "pcm";
+ jsonbegin["audio_fs"] = sample_rate;
jsonbegin["is_speaking"] = true;
jsonbegin["itn"] = true;
+ jsonbegin["svs_itn"] = true;
if(use_itn == 0){
jsonbegin["itn"] = false;
+ }
+ if(svs_itn == 0){
+ jsonbegin["svs_itn"] = false;
}
if(!hws_map.empty()){
LOG(INFO) << "hotwords: ";
@@ -408,7 +419,6 @@
param.suggestedLatency = info->defaultLowInputLatency;
param.hostApiSpecificStreamInfo = nullptr;
- float sample_rate = 16000;
PaStream *stream;
std::vector<float> buffer;
@@ -447,6 +457,7 @@
m_client.get_alog().write(websocketpp::log::alevel::app,
"Send Error: " + ec.message());
}
+ delete[] iArray;
Pa_Sleep(20); // sleep for 20ms
}
@@ -473,6 +484,10 @@
};
int main(int argc, char* argv[]) {
+#ifdef _WIN32
+ #include <windows.h>
+ SetConsoleOutputCP(65001);
+#endif
google::InitGoogleLogging(argv[0]);
FLAGS_logtostderr = true;
@@ -486,6 +501,7 @@
"the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: "
"asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)",
false, "", "string");
+ TCLAP::ValueArg<std::int32_t> audio_fs_("", "audio-fs", "the sample rate of audio", false, 16000, "int32_t");
TCLAP::ValueArg<int> record_(
"", "record",
"record is 1 means use record", false, 0,
@@ -505,18 +521,23 @@
"", "use-itn",
"use-itn is 1 means use itn, 0 means not use itn", false, 1,
"int");
+ TCLAP::ValueArg<int> svs_itn_(
+ "", "svs-itn",
+ "svs-itn is 1 means use itn and punc, 0 means not use", false, 1, "int");
TCLAP::ValueArg<std::string> hotword_("", HOTWORD,
"the hotword file, one hotword perline, Format: Hotword Weight (could be: 闃块噷宸村反 20)", false, "", "string");
cmd.add(server_ip_);
cmd.add(port_);
cmd.add(wav_path_);
+ cmd.add(audio_fs_);
cmd.add(asr_mode_);
cmd.add(record_);
cmd.add(chunk_size_);
cmd.add(thread_num_);
cmd.add(is_ssl_);
cmd.add(use_itn_);
+ cmd.add(svs_itn_);
cmd.add(hotword_);
cmd.parse(argc, argv);
@@ -526,6 +547,7 @@
std::string asr_mode = asr_mode_.getValue();
std::string chunk_size_str = chunk_size_.getValue();
int use_itn = use_itn_.getValue();
+ int svs_itn = svs_itn_.getValue();
// get chunk_size
std::vector<int> chunk_size;
std::stringstream ss(chunk_size_str);
@@ -558,6 +580,7 @@
funasr::ExtractHws(hotword_path, hws_map);
}
+ int audio_fs = audio_fs_.getValue();
if(is_record == 1){
std::vector<string> tmp_wav_list;
std::vector<string> tmp_wav_ids;
@@ -567,11 +590,11 @@
c.m_client.set_tls_init_handler(bind(&OnTlsInit, ::_1));
- c.run(uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size, hws_map, true, use_itn);
+ c.run(uri, tmp_wav_list, tmp_wav_ids, audio_fs, asr_mode, chunk_size, hws_map, true, use_itn, svs_itn);
} else {
WebsocketClient<websocketpp::config::asio_client> c(is_ssl);
- c.run(uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size, hws_map, true, use_itn);
+ c.run(uri, tmp_wav_list, tmp_wav_ids, audio_fs, asr_mode, chunk_size, hws_map, true, use_itn, svs_itn);
}
}else{
@@ -612,17 +635,17 @@
tmp_wav_ids.emplace_back(wav_ids[wav_i + i]);
client_threads.emplace_back(
- [uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size, is_ssl, hws_map, use_itn]() {
+ [uri, tmp_wav_list, tmp_wav_ids, audio_fs, asr_mode, chunk_size, is_ssl, hws_map, use_itn, svs_itn]() {
if (is_ssl == 1) {
WebsocketClient<websocketpp::config::asio_tls_client> c(is_ssl);
c.m_client.set_tls_init_handler(bind(&OnTlsInit, ::_1));
- c.run(uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size, hws_map, false, use_itn);
+ c.run(uri, tmp_wav_list, tmp_wav_ids, audio_fs, asr_mode, chunk_size, hws_map, false, use_itn, svs_itn);
} else {
WebsocketClient<websocketpp::config::asio_client> c(is_ssl);
- c.run(uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size, hws_map, false, use_itn);
+ c.run(uri, tmp_wav_list, tmp_wav_ids, audio_fs, asr_mode, chunk_size, hws_map, false, use_itn, svs_itn);
}
});
}
--
Gitblit v1.9.1