| runtime/http/bin/asr_sessions.h | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| runtime/http/bin/connection.cpp | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| runtime/http/bin/connection.hpp | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| runtime/http/bin/file_parse.cpp | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| runtime/http/bin/file_parse.hpp | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| runtime/http/bin/util.hpp | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| runtime/http/readme.md | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| runtime/http/readme_zh.md | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 |
runtime/http/bin/asr_sessions.h
@@ -2,13 +2,41 @@ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights * Reserved. MIT License (https://opensource.org/licenses/MIT) */ /* 2023-2024 by zhaomingwork@qq.com */ /* 2023-2025 by zhaomingwork@qq.com */ // FUNASR_MESSAGE define the needed message between funasr engine and http server #ifndef HTTP_SERVER2_SESSIONS_HPP #define HTTP_SERVER2_SESSIONS_HPP #include "funasrruntime.h" #include "nlohmann/json.hpp" #include <iostream> #include <thread> #include <atomic> #include <mutex> #include <condition_variable> class Semaphore { public: explicit Semaphore(int count = 0) : count_(count) {} void acquire() { std::unique_lock<std::mutex> lock(mutex_); cv_.wait(lock, [this]() { return count_ > 0; }); --count_; } void release() { std::lock_guard<std::mutex> lock(mutex_); ++count_; cv_.notify_one(); } private: int count_; std::mutex mutex_; std::condition_variable cv_; }; typedef struct { nlohmann::json msg; std::shared_ptr<std::vector<char>> samples; @@ -16,5 +44,11 @@ FUNASR_DEC_HANDLE decoder_handle=nullptr; std::atomic<int> status; //std::counting_semaphore<3> sem(0); Semaphore sem_resultok; } FUNASR_MESSAGE; #endif // HTTP_SERVER2_REQUEST_PARSER_HPP runtime/http/bin/connection.cpp
@@ -2,7 +2,7 @@ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights * Reserved. MIT License (https://opensource.org/licenses/MIT) */ /* 2023-2024 by zhaomingwork@qq.com */ /* 2023-2025 by zhaomingwork@qq.com */ // // connection.cpp // copy some codes from http://www.boost.org/ @@ -10,28 +10,35 @@ #include <thread> #include <utility> #include "util.hpp" namespace http { namespace server2 { //std::ofstream fwout("out.data", std::ios::binary); std::shared_ptr<FUNASR_MESSAGE> &connection::get_data_msg() { return data_msg; } connection::connection(asio::ip::tcp::socket socket, asio::io_context &io_decoder, int connection_id, std::shared_ptr<ModelDecoder> model_decoder) : socket_(std::move(socket)), io_decoder(io_decoder), connection_id(connection_id), model_decoder(model_decoder) namespace http { s_timer = std::make_shared<asio::steady_timer>(io_decoder); } namespace server2 { std::shared_ptr<FUNASR_MESSAGE> &connection::get_data_msg() { return data_msg; } connection::connection(asio::ip::tcp::socket socket, asio::io_context &io_decoder, int connection_id, std::shared_ptr<ModelDecoder> model_decoder) : socket_(std::move(socket)), io_decoder(io_decoder), connection_id(connection_id), model_decoder(model_decoder) void connection::setup_timer() { if (data_msg->status == 1) return; { s_timer = std::make_shared<asio::steady_timer>(io_decoder); } s_timer->expires_after(std::chrono::seconds(3)); s_timer->async_wait([=](const asio::error_code &ec) { void connection::setup_timer() { if (data_msg->status == 1) return; s_timer->expires_after(std::chrono::seconds(10)); s_timer->async_wait([=](const asio::error_code &ec) { if (!ec) { std::cout << "time is out!" << std::endl; if (data_msg->status == 1) return; @@ -40,157 +47,268 @@ auto wf = std::bind(&connection::write_back, std::ref(*this), ""); // close the connection strand_->post(wf); } }); } }); } void connection::start() { std::lock_guard<std::mutex> lock(m_lock); // for threads safty try { data_msg = std::make_shared<FUNASR_MESSAGE>(); // put a new data vector for // new connection data_msg->samples = std::make_shared<std::vector<char>>(); //data_msg->samples->reserve(16000*20); data_msg->msg = nlohmann::json::parse("{}"); data_msg->msg["wav_format"] = "pcm"; data_msg->msg["wav_name"] = "wav-default-id"; data_msg->msg["itn"] = true; data_msg->msg["audio_fs"] = 16000; // default is 16k data_msg->msg["access_num"] = 0; // the number of access for this object, // when it is 0, we can free it saftly data_msg->msg["is_eof"] = false; data_msg->status = 0; void connection::start() { std::lock_guard<std::mutex> lock(m_lock); // for threads safty try { strand_ = std::make_shared<asio::io_context::strand>(io_decoder); data_msg = std::make_shared<FUNASR_MESSAGE>(); // put a new data vector for // new connection data_msg->samples = std::make_shared<std::vector<char>>(); // data_msg->samples->reserve(16000*20); data_msg->msg = nlohmann::json::parse("{}"); data_msg->msg["wav_format"] = "pcm"; data_msg->msg["wav_name"] = "wav-default-id"; data_msg->msg["itn"] = true; data_msg->msg["audio_fs"] = 16000; // default is 16k data_msg->msg["access_num"] = 0; // the number of access for this object, // when it is 0, we can free it saftly data_msg->msg["is_eof"] = false; data_msg->status = 0; FUNASR_DEC_HANDLE decoder_handle = FunASRWfstDecoderInit( model_decoder->get_asr_handle(), ASR_OFFLINE, global_beam_, lattice_beam_, am_scale_); strand_ = std::make_shared<asio::io_context::strand>(io_decoder); data_msg->decoder_handle = decoder_handle; FUNASR_DEC_HANDLE decoder_handle = FunASRWfstDecoderInit( model_decoder->get_asr_handle(), ASR_OFFLINE, global_beam_, lattice_beam_, am_scale_); if (data_msg->hotwords_embedding == nullptr) { std::unordered_map<std::string, int> merged_hws_map; std::string nn_hotwords = ""; data_msg->decoder_handle = decoder_handle; if (true) { std::string json_string = "{}"; if (!json_string.empty()) { nlohmann::json json_fst_hws; try { json_fst_hws = nlohmann::json::parse(json_string); if (json_fst_hws.type() == nlohmann::json::value_t::object) { // fst try { std::unordered_map<std::string, int> client_hws_map = json_fst_hws; merged_hws_map.insert(client_hws_map.begin(), client_hws_map.end()); } catch (const std::exception &e) { if (data_msg->hotwords_embedding == nullptr) { std::unordered_map<std::string, int> merged_hws_map; std::string nn_hotwords = ""; if (true) { std::string json_string = "{}"; if (!json_string.empty()) { nlohmann::json json_fst_hws; try { json_fst_hws = nlohmann::json::parse(json_string); if (json_fst_hws.type() == nlohmann::json::value_t::object) { // fst try { std::unordered_map<std::string, int> client_hws_map = json_fst_hws; merged_hws_map.insert(client_hws_map.begin(), client_hws_map.end()); } catch (const std::exception &e) { std::cout << e.what(); } } } catch (std::exception const &e) { std::cout << e.what(); // nn std::string client_nn_hws = "{}"; nn_hotwords += " " + client_nn_hws; std::cout << "nn hotwords: " << client_nn_hws; } } } catch (std::exception const &e) { std::cout << e.what(); // nn std::string client_nn_hws = "{}"; nn_hotwords += " " + client_nn_hws; std::cout << "nn hotwords: " << client_nn_hws; } merged_hws_map.insert(hws_map_.begin(), hws_map_.end()); // fst std::cout << "hotwords: "; for (const auto &pair : merged_hws_map) { nn_hotwords += " " + pair.first; std::cout << pair.first << " : " << pair.second; } FunWfstDecoderLoadHwsRes(data_msg->decoder_handle, fst_inc_wts_, merged_hws_map); // nn std::vector<std::vector<float>> new_hotwords_embedding = CompileHotwordEmbedding(model_decoder->get_asr_handle(), nn_hotwords); data_msg->hotwords_embedding = std::make_shared<std::vector<std::vector<float>>>( new_hotwords_embedding); } } merged_hws_map.insert(hws_map_.begin(), hws_map_.end()); // fst std::cout << "hotwords: "; for (const auto &pair : merged_hws_map) { nn_hotwords += " " + pair.first; std::cout << pair.first << " : " << pair.second; do_read(); } FunWfstDecoderLoadHwsRes(data_msg->decoder_handle, fst_inc_wts_, merged_hws_map); // nn std::vector<std::vector<float>> new_hotwords_embedding = CompileHotwordEmbedding(model_decoder->get_asr_handle(), nn_hotwords); data_msg->hotwords_embedding = std::make_shared<std::vector<std::vector<float>>>( new_hotwords_embedding); catch (const std::exception &e) { std::cout << "error:" << e.what(); } } file_parse = std::make_shared<http::server2::file_parser>(data_msg); do_read(); } catch (const std::exception &e) { std::cout << "error:" << e.what(); } } void connection::write_back(std::string str) { s_timer->cancel(); reply_ = reply::stock_reply( data_msg->msg["asr_result"].dump()); // reply::stock_reply(); do_write(); } void connection::do_read() { void connection::write_back(std::string str) { s_timer->cancel(); std::cout << "jsonresult=" << data_msg->msg["asr_result"].dump() << std::endl; reply_ = reply::stock_reply( data_msg->msg["asr_result"].dump()); // reply::stock_reply(); do_write(); } void connection::do_read() { // status==1 means time out if (data_msg->status == 1) return; if (data_msg->status == 1) return; s_timer->cancel(); setup_timer(); auto self(shared_from_this()); socket_.async_read_some( asio::buffer(buffer_), [this, self](asio::error_code ec, std::size_t bytes_transferred) { if (!ec) { auto is = std::begin(buffer_); auto ie = std::next(is, bytes_transferred); s_timer->cancel(); setup_timer(); auto self(shared_from_this()); socket_.async_read_some( asio::buffer(buffer_), [this, self](asio::error_code ec, std::size_t bytes_transferred) { if (ec) { handle_error(ec); return; } http::server2::file_parser::result_type rtype = file_parse->parse_file(is, ie); if (rtype == http::server2::file_parser::result_type::ok) { // 将新数据追加到累积缓冲区 received_data_.append(buffer_.data(), bytes_transferred); switch (state_) { case State::ReadingHeaders: //fwout.write(data_msg->samples->data(),data_msg->samples->size()); //fwout.flush(); auto wf = std::bind(&connection::write_back, std::ref(*this), "aa"); auto f = std::bind(&ModelDecoder::do_decoder, std::ref(*model_decoder), std::ref(data_msg)); if (try_parse_headers()) { if (state_ == State::SendingContinue) { // for decode task strand_->post(f); // for close task strand_->post(wf); // std::this_thread::sleep_for(std::chrono::milliseconds(1000*10)); } handle_100_continue(); } else { do_read(); } }); } handle_body(); } } else { void connection::do_write() { auto self(shared_from_this()); asio::async_write(socket_, reply_.to_buffers(), [this, self](asio::error_code ec, std::size_t) { if (!ec) { // Initiate graceful connection closure. asio::error_code ignored_ec; socket_.shutdown(asio::ip::tcp::socket::shutdown_both, ignored_ec); } do_read(); } break; // No new asynchronous operations are started. This means // that all shared_ptr references to the connection object // will disappear and the object will be destroyed // automatically after this handler returns. The // connection class's destructor closes the socket. }); } case State::ReadingBody: handle_body(); break; } // namespace server2 } // namespace http case State::SendingContinue: break; // 等待100 Continue发送完成 } }); } std::string connection::parse_attachment_filename(const std::string &header) { size_t pos = header.find("Content-Disposition: "); if (pos == std::string::npos) return ""; pos += 21; // "Content-Disposition: "长度 size_t end = header.find("\r\n", pos); if (end == std::string::npos) return ""; // 调用解析函数 return parse_attachment_filename_impl(header.substr(pos, end - pos)); } void connection::handle_body() { process_multipart_data(); if (in_file_part_ == false) { std::cout << "文件获取结束" << std::endl; std::cout << "开始解码,数据大小= " << data_msg->samples->size() << std::endl; auto close_thread = std::bind(&connection::write_back, std::ref(*this), "close"); auto decoder_thread = std::bind(&ModelDecoder::do_decoder, std::ref(*model_decoder), std::ref(data_msg)); // for decode task strand_->post(decoder_thread); // for close task strand_->post(close_thread); data_msg->sem_resultok.acquire(); std::cout << "解码线程提交结束!!!! " << std::endl; } else do_read(); return; } // 辅助函数:解析 Content-Length size_t connection::parse_content_length(const std::string &header) { size_t pos = header.find("Content-Length: "); if (pos == std::string::npos) return 0; pos += 16; // "Content-Length: "长度 size_t end = header.find("\r\n", pos); if (end == std::string::npos) return 0; try { return std::stoul(header.substr(pos, end - pos)); } catch (...) { return 0; } } void connection::handle_error(asio::error_code ec) { if (ec == asio::error::eof) { std::cout << "Connection closed gracefully\n"; } else { std::cerr << "Error: " << ec.message() << "\n"; } } void connection::do_write() { auto self(shared_from_this()); asio::async_write(socket_, reply_.to_buffers(), [this, self](asio::error_code ec, std::size_t) { if (!ec) { // Initiate graceful connection closure. asio::error_code ignored_ec; socket_.shutdown(asio::ip::tcp::socket::shutdown_both, ignored_ec); } data_msg->sem_resultok.release(); // No new asynchronous operations are started. This means // that all shared_ptr references to the connection object // will disappear and the object will be destroyed // automatically after this handler returns. The // connection class's destructor closes the socket. }); } } // namespace server2 } // namespace http runtime/http/bin/connection.hpp
@@ -2,7 +2,7 @@ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights * Reserved. MIT License (https://opensource.org/licenses/MIT) */ /* 2023-2024 by zhaomingwork@qq.com */ /* 2023-2025 by zhaomingwork@qq.com */ // // copy some codes from http://www.boost.org/ // @@ -19,86 +19,376 @@ #include "reply.hpp" #include <fstream> #include "file_parse.hpp" #include <boost/beast.hpp> #include "model-decoder.h" namespace beast = boost::beast; namespace beasthttp = beast::http; extern std::unordered_map<std::string, int> hws_map_; extern int fst_inc_wts_; extern float global_beam_, lattice_beam_, am_scale_; namespace http { namespace server2 { namespace http { namespace server2 { /// Represents a single connection from a client. class connection : public std::enable_shared_from_this<connection> { public: connection(const connection &) = delete; connection &operator=(const connection &) = delete; ~connection() { std::cout << "one connection is close()" << std::endl; }; /// Represents a single connection from a client. class connection : public std::enable_shared_from_this<connection> { public: connection(const connection &) = delete; connection &operator=(const connection &) = delete; ~connection() { std::cout << "one connection is close()" << std::endl; }; /// Construct a connection with the given socket. explicit connection(asio::ip::tcp::socket socket, asio::io_context &io_decoder, int connection_id, std::shared_ptr<ModelDecoder> model_decoder); /// Construct a connection with the given socket. explicit connection(asio::ip::tcp::socket socket, asio::io_context &io_decoder, int connection_id, std::shared_ptr<ModelDecoder> model_decoder); /// Start the first asynchronous operation for the connection. void start(); std::shared_ptr<FUNASR_MESSAGE> &get_data_msg(); void write_back(std::string str); // 处理100 Continue逻辑 void handle_100_continue() { // start_timer(5); // 5秒超时 auto self = shared_from_this(); const std::string response = "HTTP/1.1 100 Continue\r\n" "Connection: keep-alive\r\n\r\n"; asio::async_write(socket_, asio::buffer(response), [this, self](asio::error_code ec, size_t) { if (ec) return handle_error(ec); state_ = State::ReadingHeaders; do_read(); }); } // 准备文件存储 void prepare_body_handling() { if (!filename_.empty()) { sanitize_filename(filename_); output_file_.open(filename_, std::ios::binary); if (!output_file_) { std::cerr << "Failed to open: " << filename_ << "\n"; socket_.close(); } } } void finalize_request() { std::cout << "finalize_request" << std::endl; send_final_response(); } void send_final_response() { const std::string response = "HTTP/1.1 200 OK\r\n" "Content-Length: 0\r\n\r\n"; asio::write(socket_, asio::buffer(response)); socket_.close(); } void send_417_expectation_failed() { const std::string response = "HTTP/1.1 417 Expectation Failed\r\n" "Connection: close\r\n\r\n"; asio::write(socket_, asio::buffer(response)); socket_.close(); } // 安全处理文件名 static void sanitize_filename(std::string &name) { std::replace(name.begin(), name.end(), '/', '_'); std::replace(name.begin(), name.end(), '\\', '_'); name = name.substr(name.find_last_of(":") + 1); // 移除潜在路径 } // 协议版本解析 bool parse_http_version(const std::string &headers) { size_t start = headers.find("HTTP/"); if (start == std::string::npos) return false; start += 5; size_t dot = headers.find('.', start); if (dot == std::string::npos) return false; try { http_version_major_ = std::stoi(headers.substr(start, dot - start)); http_version_minor_ = std::stoi(headers.substr(dot + 1, 1)); return true; } catch (...) { return false; } } // 头部解析 bool try_parse_headers() { size_t header_end = received_data_.find("\r\n\r\n"); if (header_end == std::string::npos) { return false; } std::string headers = received_data_.substr(0, header_end); // 解析内容信息 if (content_length_ <= 0) content_length_ = parse_content_length(headers); // 解析HTTP版本 if (!parse_http_version(headers)) { return false; } // 检查Expect头 std::string continue100 = "Expect: 100-continue"; size_t pos = headers.find(continue100); expect_100_continue_ = pos != std::string::npos; // 检查协议兼容性 if (expect_100_continue_) { headers.erase(pos, continue100.length()); received_data_ = headers; state_ = State::SendingContinue; if (http_version_minor_ < 1) send_417_expectation_failed(); return true; } filename_ = parse_attachment_filename(headers); // 状态转移 std::string ext = parese_file_ext(filename_); if (filename_.find(".wav") != std::string::npos) { std::cout << "set wav_format=pcm, file_name=" << filename_ << std::endl; data_msg->msg["wav_format"] = "pcm"; } else { std::cout << "set wav_format=" << ext << ", file_name=" << filename_ << std::endl; data_msg->msg["wav_format"] = ext; } data_msg->msg["wav_name"] = filename_; /// Start the first asynchronous operation for the connection. void start(); std::shared_ptr<FUNASR_MESSAGE> &get_data_msg(); void write_back(std::string str); state_ = State::ReadingBody; return true; } private: /// Perform an asynchronous read operation. void do_read(); void parse_multipart_boundary() { size_t content_type_pos = received_data_.find("Content-Type: multipart/form-data"); if (content_type_pos == std::string::npos) return; /// Perform an asynchronous write operation. void do_write(); size_t boundary_pos = received_data_.find("boundary=", content_type_pos); if (boundary_pos == std::string::npos) return; void do_decoder(); boundary_pos += 9; // "boundary="长度 size_t boundary_end = received_data_.find("\r\n", boundary_pos); boundary_ = received_data_.substr(boundary_pos, boundary_end - boundary_pos); void setup_timer(); // 清理boundary的引号 if (boundary_.front() == '"' && boundary_.back() == '"') { boundary_ = boundary_.substr(1, boundary_.size() - 2); } } // multipart 数据处理核心 void process_multipart_data() { if (boundary_.empty()) { parse_multipart_boundary(); if (boundary_.empty()) { std::cerr << "Invalid multipart format\n"; return; } } /// Socket for the connection. asio::ip::tcp::socket socket_; while (true) { if (!in_file_part_) { // 查找boundary起始 size_t boundary_pos = received_data_.find("--" + boundary_); if (boundary_pos == std::string::npos) break; // 移动到part头部 size_t part_start = received_data_.find("\r\n\r\n", boundary_pos); if (part_start == std::string::npos) break; part_start += 4; // 跳过空行 parse_part_headers(received_data_.substr(boundary_pos, part_start - boundary_pos)); received_data_.erase(0, part_start); in_file_part_ = true; } else { // 查找boundary结束 size_t boundary_end = received_data_.find("\r\n--" + boundary_); if (boundary_end == std::string::npos) break; // 写入内容 std::string tmpstr = received_data_.substr(0, boundary_end); data_msg->samples->insert(data_msg->samples->end(), tmpstr.begin(), tmpstr.end()); received_data_.erase(0, boundary_end + 2); // 保留\r\n供下次解析 in_file_part_ = false; } } } std::string parese_file_ext(std::string file_name) { int pos = file_name.rfind('.'); std::string ext = ""; if (pos != std::string::npos) ext = file_name.substr(pos + 1); return ext; } // 解析part头部信息 void parse_part_headers(const std::string &headers) { current_part_filename_.clear(); expected_part_size_ = 0; // 解析文件名 size_t filename_pos = headers.find("filename=\""); if (filename_pos != std::string::npos) { filename_pos += 10; size_t filename_end = headers.find('"', filename_pos); current_part_filename_ = headers.substr(filename_pos, filename_end - filename_pos); sanitize_filename(current_part_filename_); } // 解析Content-Length size_t cl_pos = headers.find("Content-Length: "); if (cl_pos != std::string::npos) { cl_pos += 15; size_t cl_end = headers.find("\r\n", cl_pos); expected_part_size_ = std::stoull(headers.substr(cl_pos, cl_end - cl_pos)); } } private: /// Perform an asynchronous read operation. void do_read(); void handle_body(); std::string parse_attachment_filename(const std::string &header); size_t parse_content_length(const std::string &header); void handle_error(asio::error_code ec); /// Perform an asynchronous write operation. void do_write(); void do_decoder(); void setup_timer(); /// Socket for the connection. asio::ip::tcp::socket socket_; /// Buffer for incoming data. std::array<char, 8192> buffer_; /// for time out std::shared_ptr<asio::steady_timer> s_timer; std::shared_ptr<ModelDecoder> model_decoder; int connection_id = 0; /// The reply to be sent back to the client. reply reply_; asio::io_context &io_decoder; std::shared_ptr<FUNASR_MESSAGE> data_msg; std::mutex m_lock; std::shared_ptr<asio::io_context::strand> strand_; /// Buffer for incoming data. std::array<char, 8192> buffer_; /// for time out std::shared_ptr<asio::steady_timer> s_timer; beasthttp::response_parser<beasthttp::string_body> parser_; // 渐进式解析器 std::string received_data_; // 累积接收的数据 bool header_parsed_ = false; // 头部解析状态标记 size_t content_length_ = 0; // Content-Length 值 enum class State { ReadingHeaders, SendingContinue, ReadingBody }; bool expect_100_continue_ = false; State state_ = State::ReadingHeaders; std::string filename_; std::ofstream output_file_; int http_version_major_ = 1; int http_version_minor_ = 1; std::string boundary_ = ""; bool in_file_part_ = false; std::string current_part_filename_; size_t expected_part_size_ = 0; }; std::shared_ptr<ModelDecoder> model_decoder; typedef std::shared_ptr<connection> connection_ptr; } // namespace server2 } // namespace http int connection_id = 0; /// The reply to be sent back to the client. reply reply_; asio::io_context &io_decoder; std::shared_ptr<FUNASR_MESSAGE> data_msg; std::mutex m_lock; std::shared_ptr<asio::io_context::strand> strand_; std::shared_ptr<http::server2::file_parser> file_parse; }; typedef std::shared_ptr<connection> connection_ptr; } // namespace server2 } // namespace http #endif // HTTP_SERVER2_CONNECTION_HPP #endif // HTTP_SERVER2_CONNECTION_HPP runtime/http/bin/file_parse.cpp
File was deleted runtime/http/bin/file_parse.hpp
File was deleted runtime/http/bin/util.hpp
New file @@ -0,0 +1,95 @@ #include <string> #include <vector> #include <sstream> #include <cctype> #include <cstdio> #include <algorithm> // 辅助函数:修剪字符串两端的空白 std::string trim(const std::string& s) { auto start = s.find_first_not_of(" \t"); if (start == std::string::npos) return ""; auto end = s.find_last_not_of(" \t"); return s.substr(start, end - start + 1); } // 辅助函数:检查字符串是否以某个前缀开头 bool starts_with(const std::string& s, const std::string& prefix) { return s.size() >= prefix.size() && s.compare(0, prefix.size(), prefix) == 0; } // 辅助函数:分割字符串 std::vector<std::string> split(const std::string& s, char delimiter) { std::vector<std::string> tokens; std::string token; std::istringstream token_stream(s); while (std::getline(token_stream, token, delimiter)) { if (!token.empty()) { tokens.push_back(token); } } return tokens; } // URL 解码(简易实现) std::string url_decode(const std::string& str) { std::string result; for (size_t i = 0; i < str.size(); ++i) { if (str[i] == '%' && i + 2 < str.size()) { int hex_val; if (sscanf(str.substr(i + 1, 2).c_str(), "%x", &hex_val) == 1) { result += static_cast<char>(hex_val); i += 2; } else { result += str[i]; } } else if (str[i] == '+') { result += ' '; } else { result += str[i]; } } return result; } // 解析 RFC 5987 编码(如 filename*=UTF-8''%C2%A3.txt) std::string decode_rfc5987(const std::string& value) { size_t pos = value.find("''"); if (pos != std::string::npos) { std::string encoded = value.substr(pos + 2); return url_decode(encoded); } return value; } // 主解析函数 std::string parse_attachment_filename_impl(const std::string& content_disp) { std::vector<std::string> parts = split(content_disp, ';'); std::string filename; for (auto& part : parts) { std::string trimmed = trim(part); // 优先处理 RFC 5987 编码的 filename* if (starts_with(trimmed, "filename*=")) { std::string value = trimmed.substr(10); if (value.size() >= 2 && value.front() == '"' && value.back() == '"') { value = value.substr(1, value.size() - 2); } return decode_rfc5987(value); } // 其次处理普通 filename else if (starts_with(trimmed, "filename=")) { std::string value = trimmed.substr(9); if (value.size() >= 2 && value.front() == '"' && value.back() == '"') { value = value.substr(1, value.size() - 2); } filename = value; } } return filename; } runtime/http/readme.md
@@ -18,6 +18,9 @@ ### Install deps ```shell # need to install boost lib apt install libboost-dev libboost-system-dev #ubuntu # openblas sudo apt-get install libopenblas-dev #ubuntu # sudo yum -y install openblas-devel #centos @@ -44,15 +47,6 @@ ### run ```shell ./funasr-http-server \ --lm-dir '' \ --itn-dir '' \ --download-model-dir ${download_model_dir} \ --model-dir ${model_dir} \ --vad-dir ${vad_dir} \ --punc-dir ${punc_dir} \ --decoder-thread-num ${decoder_thread_num} \ --io-thread-num ${io_thread_num} \ --port ${port} \ ./funasr-http-server --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx --model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx --punc-dir damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx --itn-dir '' --lm-dir '' --port 10001 ``` runtime/http/readme_zh.md
@@ -18,6 +18,8 @@ ### 安装依赖 ```shell # need to install boost lib apt install libboost-dev libboost-system-dev #ubuntu # openblas sudo apt-get install libopenblas-dev #ubuntu # sudo yum -y install openblas-devel #centos @@ -45,16 +47,7 @@ ### 运行 ```shell ./funasr-http-server \ --lm-dir '' \ --itn-dir '' \ --download-model-dir ${download_model_dir} \ --model-dir ${model_dir} \ --vad-dir ${vad_dir} \ --punc-dir ${punc_dir} \ --decoder-thread-num ${decoder_thread_num} \ --io-thread-num ${io_thread_num} \ --port ${port} \ ./funasr-http-server --vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx --model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx --punc-dir damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx --itn-dir '' --lm-dir '' --port 10001 ```