| | |
| | | /** |
| | | * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. |
| | | * MIT License (https://opensource.org/licenses/MIT) |
| | | */ |
| | | |
| | | #ifndef _WIN32 |
| | | #include <sys/time.h> |
| | |
| | | #include <win_func.h> |
| | | #endif |
| | | |
| | | #include "libfunasrapi.h" |
| | | #include <glog/logging.h> |
| | | #include "funasrruntime.h" |
| | | #include "tclap/CmdLine.h" |
| | | #include "com-define.h" |
| | | |
| | | #include <iostream> |
| | | #include <fstream> |
| | |
| | | #include <atomic> |
| | | #include <mutex> |
| | | #include <thread> |
| | | #include <map> |
| | | |
| | | using namespace std; |
| | | |
| | | std::atomic<int> index(0); |
| | | std::atomic<int> wav_index(0); |
| | | std::mutex mtx; |
| | | |
| | | void runReg(FUNASR_HANDLE AsrHandle, vector<string> wav_list, |
| | | void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list, |
| | | float* total_length, long* total_time, int core_id) { |
| | | |
| | | // cpu_set_t cpuset; |
| | | // CPU_ZERO(&cpuset); |
| | | // CPU_SET(core_id, &cpuset); |
| | | // if(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) < 0){ |
| | | // perror("pthread_setaffinity_np"); |
| | | // } |
| | | |
| | | struct timeval start, end; |
| | | long seconds = 0; |
| | |
| | | // warm up |
| | | for (size_t i = 0; i < 1; i++) |
| | | { |
| | | FUNASR_RESULT Result=FunASRRecogFile(AsrHandle, wav_list[0].c_str(), RASR_NONE, NULL); |
| | | FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL, 16000); |
| | | } |
| | | |
| | | while (true) { |
| | | // 使用原子变量获取索引并递增 |
| | | int i = index.fetch_add(1); |
| | | int i = wav_index.fetch_add(1); |
| | | if (i >= wav_list.size()) { |
| | | break; |
| | | } |
| | | |
| | | gettimeofday(&start, NULL); |
| | | FUNASR_RESULT Result=FunASRRecogFile(AsrHandle, wav_list[i].c_str(), RASR_NONE, NULL); |
| | | FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL, 16000); |
| | | |
| | | gettimeofday(&end, NULL); |
| | | seconds = (end.tv_sec - start.tv_sec); |
| | | long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); |
| | | n_total_time += taking_micros; |
| | | |
| | | if(Result){ |
| | | string msg = FunASRGetResult(Result, 0); |
| | | printf("Thread: %d Result: %s \n", this_thread::get_id(), msg.c_str()); |
| | | if(result){ |
| | | string msg = FunASRGetResult(result, 0); |
| | | LOG(INFO) << "Thread: " << this_thread::get_id() <<" Result: " << msg.c_str(); |
| | | |
| | | float snippet_time = FunASRGetRetSnippetTime(Result); |
| | | float snippet_time = FunASRGetRetSnippetTime(result); |
| | | n_total_length += snippet_time; |
| | | FunASRFreeResult(Result); |
| | | FunASRFreeResult(result); |
| | | }else{ |
| | | cout <<"No return data!"; |
| | | LOG(ERROR) << ("No return data!\n"); |
| | | } |
| | | |
| | | } |
| | | { |
| | | lock_guard<mutex> guard(mtx); |
| | |
| | | } |
| | | } |
| | | |
| | | bool is_target_file(const std::string& filename, const std::string target) { |
| | | std::size_t pos = filename.find_last_of("."); |
| | | if (pos == std::string::npos) { |
| | | return false; |
| | | } |
| | | std::string extension = filename.substr(pos + 1); |
| | | return (extension == target); |
| | | } |
| | | |
| | | void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path) |
| | | { |
| | | if (value_arg.isSet()){ |
| | | model_path.insert({key, value_arg.getValue()}); |
| | | LOG(INFO)<< key << " : " << value_arg.getValue(); |
| | | } |
| | | } |
| | | |
| | | int main(int argc, char *argv[]) |
| | | { |
| | | google::InitGoogleLogging(argv[0]); |
| | | FLAGS_logtostderr = true; |
| | | |
| | | if (argc < 5) |
| | | { |
| | | printf("Usage: %s /path/to/model_dir /path/to/wav.scp quantize(true or false) nThreadNum \n", argv[0]); |
| | | exit(-1); |
| | | } |
| | | TCLAP::CmdLine cmd("funasr-onnx-offline-rtf", ' ', "1.0"); |
| | | TCLAP::ValueArg<std::string> model_dir("", MODEL_DIR, "the model path, which contains model.onnx, config.yaml, am.mvn", true, "", "string"); |
| | | TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string"); |
| | | TCLAP::ValueArg<std::string> vad_dir("", VAD_DIR, "the vad model path, which contains model.onnx, vad.yaml, vad.mvn", false, "", "string"); |
| | | TCLAP::ValueArg<std::string> vad_quant("", VAD_QUANT, "false (Default), load the model of model.onnx in vad_dir. If set true, load the model of model_quant.onnx in vad_dir", false, "false", "string"); |
| | | TCLAP::ValueArg<std::string> punc_dir("", PUNC_DIR, "the punc model path, which contains model.onnx, punc.yaml", false, "", "string"); |
| | | TCLAP::ValueArg<std::string> punc_quant("", PUNC_QUANT, "false (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir", false, "false", "string"); |
| | | |
| | | // read wav.scp |
| | | vector<string> wav_list; |
| | | ifstream in(argv[2]); |
| | | if (!in.is_open()) { |
| | | printf("Failed to open file: %s", argv[2]); |
| | | return 0; |
| | | } |
| | | string line; |
| | | while(getline(in, line)) |
| | | { |
| | | istringstream iss(line); |
| | | string column1, column2; |
| | | iss >> column1 >> column2; |
| | | wav_list.push_back(column2); |
| | | } |
| | | in.close(); |
| | | TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string"); |
| | | TCLAP::ValueArg<std::int32_t> thread_num("", THREAD_NUM, "multi-thread num for rtf", true, 0, "int32_t"); |
| | | |
| | | // model init |
| | | cmd.add(model_dir); |
| | | cmd.add(quantize); |
| | | cmd.add(vad_dir); |
| | | cmd.add(vad_quant); |
| | | cmd.add(punc_dir); |
| | | cmd.add(punc_quant); |
| | | cmd.add(wav_path); |
| | | cmd.add(thread_num); |
| | | cmd.parse(argc, argv); |
| | | |
| | | std::map<std::string, std::string> model_path; |
| | | GetValue(model_dir, MODEL_DIR, model_path); |
| | | GetValue(quantize, QUANTIZE, model_path); |
| | | GetValue(vad_dir, VAD_DIR, model_path); |
| | | GetValue(vad_quant, VAD_QUANT, model_path); |
| | | GetValue(punc_dir, PUNC_DIR, model_path); |
| | | GetValue(punc_quant, PUNC_QUANT, model_path); |
| | | GetValue(wav_path, WAV_PATH, model_path); |
| | | |
| | | struct timeval start, end; |
| | | gettimeofday(&start, NULL); |
| | | // is quantize |
| | | bool quantize = false; |
| | | istringstream(argv[3]) >> boolalpha >> quantize; |
| | | // thread num |
| | | int nThreadNum = 1; |
| | | nThreadNum = atoi(argv[4]); |
| | | FUNASR_HANDLE asr_handle=FunOfflineInit(model_path, 1); |
| | | |
| | | FUNASR_HANDLE AsrHandle=FunASRInit(argv[1], 1, quantize); |
| | | if (!AsrHandle) |
| | | if (!asr_handle) |
| | | { |
| | | printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]); |
| | | LOG(ERROR) << "FunASR init failed"; |
| | | exit(-1); |
| | | } |
| | | |
| | | gettimeofday(&end, NULL); |
| | | long seconds = (end.tv_sec - start.tv_sec); |
| | | long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); |
| | | printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000); |
| | | LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s"; |
| | | |
| | | // read wav_path |
| | | vector<string> wav_list; |
| | | string wav_path_ = model_path.at(WAV_PATH); |
| | | if(is_target_file(wav_path_, "wav") || is_target_file(wav_path_, "pcm")){ |
| | | wav_list.emplace_back(wav_path_); |
| | | } |
| | | else if(is_target_file(wav_path_, "scp")){ |
| | | ifstream in(wav_path_); |
| | | if (!in.is_open()) { |
| | | LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP) ; |
| | | return 0; |
| | | } |
| | | string line; |
| | | while(getline(in, line)) |
| | | { |
| | | istringstream iss(line); |
| | | string column1, column2; |
| | | iss >> column1 >> column2; |
| | | wav_list.emplace_back(column2); |
| | | } |
| | | in.close(); |
| | | }else{ |
| | | LOG(ERROR)<<"Please check the wav extension!"; |
| | | exit(-1); |
| | | } |
| | | |
| | | // 多线程测试 |
| | | float total_length = 0.0f; |
| | | long total_time = 0; |
| | | std::vector<std::thread> threads; |
| | | |
| | | for (int i = 0; i < nThreadNum; i++) |
| | | int rtf_threds = thread_num.getValue(); |
| | | for (int i = 0; i < rtf_threds; i++) |
| | | { |
| | | threads.emplace_back(thread(runReg, AsrHandle, wav_list, &total_length, &total_time, i)); |
| | | threads.emplace_back(thread(runReg, asr_handle, wav_list, &total_length, &total_time, i)); |
| | | } |
| | | |
| | | for (auto& thread : threads) |
| | |
| | | thread.join(); |
| | | } |
| | | |
| | | printf("total_time_wav %ld ms.\n", (long)(total_length * 1000)); |
| | | printf("total_time_comput %ld ms.\n", total_time / 1000); |
| | | printf("total_rtf %05lf .\n", (double)total_time/ (total_length*1000000)); |
| | | printf("speedup %05lf .\n", 1.0/((double)total_time/ (total_length*1000000))); |
| | | LOG(INFO) << "total_time_wav " << (long)(total_length * 1000) << " ms"; |
| | | LOG(INFO) << "total_time_comput " << total_time / 1000 << " ms"; |
| | | LOG(INFO) << "total_rtf " << (double)total_time/ (total_length*1000000); |
| | | LOG(INFO) << "speedup " << 1.0/((double)total_time/ (total_length*1000000)); |
| | | |
| | | FunASRUninit(AsrHandle); |
| | | FunASRUninit(asr_handle); |
| | | return 0; |
| | | } |