funasr/runtime/onnxruntime/CMakeLists.txt
@@ -30,4 +30,3 @@ add_subdirectory("./third_party/yaml-cpp") add_subdirectory(kaldi-native-fbank/kaldi-native-fbank/csrc) add_subdirectory(src) add_subdirectory(tester) funasr/runtime/onnxruntime/include/Audio.h
@@ -2,14 +2,10 @@ #ifndef AUDIO_H #define AUDIO_H #include <ComDefine.h> #include <queue> #include <stdint.h> #include "Model.h" #ifndef model_sample_rate #define model_sample_rate 16000 #endif #ifndef WAV_HEADER_SIZE #define WAV_HEADER_SIZE 44 #endif funasr/runtime/onnxruntime/include/ComDefine.h
@@ -8,4 +8,21 @@ #define S_ALL 3 #define S_ERR 4 #ifndef MODEL_SAMPLE_RATE #define MODEL_SAMPLE_RATE 16000 #endif #ifndef VAD_SILENCE_DYRATION #define VAD_SILENCE_DYRATION 15000 #endif #ifndef VAD_MAX_LEN #define VAD_MAX_LEN 800 #endif #ifndef VAD_SPEECH_NOISE_THRES #define VAD_SPEECH_NOISE_THRES 0.9 #endif #endif funasr/runtime/onnxruntime/src/Audio.cpp
@@ -187,13 +187,13 @@ void Audio::disp() { printf("Audio time is %f s. len is %d\n", (float)speech_len / model_sample_rate, printf("Audio time is %f s. len is %d\n", (float)speech_len / MODEL_SAMPLE_RATE, speech_len); } float Audio::get_time_len() { return (float)speech_len / model_sample_rate; return (float)speech_len / MODEL_SAMPLE_RATE; } void Audio::wavResample(int32_t sampling_rate, const float *waveform, @@ -203,9 +203,9 @@ "Creating a resampler:\n" " in_sample_rate: %d\n" " output_sample_rate: %d\n", sampling_rate, static_cast<int32_t>(model_sample_rate)); sampling_rate, static_cast<int32_t>(MODEL_SAMPLE_RATE)); float min_freq = std::min<int32_t>(sampling_rate, model_sample_rate); std::min<int32_t>(sampling_rate, MODEL_SAMPLE_RATE); float lowpass_cutoff = 0.99 * 0.5 * min_freq; int32_t lowpass_filter_width = 6; @@ -213,7 +213,7 @@ //auto resampler = new LinearResample( // sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width); auto resampler = std::make_unique<LinearResample>( sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width); sampling_rate, MODEL_SAMPLE_RATE, lowpass_cutoff, lowpass_filter_width); std::vector<float> samples; resampler->Resample(waveform, n, true, &samples); //reset speech_data @@ -270,7 +270,7 @@ } //resample if(*sampling_rate != model_sample_rate){ if(*sampling_rate != MODEL_SAMPLE_RATE){ wavResample(*sampling_rate, speech_data, speech_len); } @@ -317,7 +317,7 @@ } //resample if(*sampling_rate != model_sample_rate){ if(*sampling_rate != MODEL_SAMPLE_RATE){ wavResample(*sampling_rate, speech_data, speech_len); } @@ -360,7 +360,7 @@ } //resample if(*sampling_rate != model_sample_rate){ if(*sampling_rate != MODEL_SAMPLE_RATE){ wavResample(*sampling_rate, speech_data, speech_len); } @@ -411,7 +411,7 @@ } //resample if(*sampling_rate != model_sample_rate){ if(*sampling_rate != MODEL_SAMPLE_RATE){ wavResample(*sampling_rate, speech_data, speech_len); } @@ -511,7 +511,7 @@ std::vector<float> pcm_data(speech_data, speech_data+sp_len); vector<std::vector<int>> vad_segments = pRecogObj->vad_seg(pcm_data); int seg_sample = model_sample_rate/1000; int seg_sample = MODEL_SAMPLE_RATE/1000; for(vector<int> segment:vad_segments) { frame = new AudioFrame(); funasr/runtime/onnxruntime/src/CMakeLists.txt
@@ -1,25 +1,22 @@ file(GLOB files1 "*.cpp") file(GLOB files2 "*.cc") file(GLOB files4 "paraformer/*.cpp") set(files ${files1} ${files2} ${files3} ${files4}) # message("${files}") set(files ${files1} ${files2}) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) add_library(funasr ${files}) if(WIN32) set(EXTRA_LIBS pthread yaml-cpp csrc) if(CMAKE_CL_64) target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64) else() target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86) endif() target_include_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include ) target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT) set(EXTRA_LIBS pthread yaml-cpp csrc) if(CMAKE_CL_64) target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64) else() target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86) endif() target_include_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include ) target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT) else() set(EXTRA_LIBS pthread yaml-cpp csrc) @@ -38,4 +35,8 @@ include_directories(${CMAKE_SOURCE_DIR}/include) target_link_libraries(funasr PUBLIC onnxruntime ${EXTRA_LIBS}) add_executable(funasr-onnx-offline "funasr-onnx-offline.cpp") add_executable(funasr-onnx-offline-rtf "funasr-onnx-offline-rtf.cpp") target_link_libraries(funasr-onnx-offline PUBLIC funasr) target_link_libraries(funasr-onnx-offline-rtf PUBLIC funasr) funasr/runtime/onnxruntime/src/FeatureQueue.cpp
File was deleted funasr/runtime/onnxruntime/src/FeatureQueue.h
File was deleted funasr/runtime/onnxruntime/src/SpeechWrap.cpp
File was deleted funasr/runtime/onnxruntime/src/SpeechWrap.h
File was deleted funasr/runtime/onnxruntime/src/commonfunc.h
@@ -1,6 +1,5 @@ #pragma once typedef struct { std::string msg; @@ -10,8 +9,6 @@ #ifdef _WIN32 #include <codecvt> inline std::wstring string2wstring(const std::string& str, const std::string& locale) { @@ -28,8 +25,6 @@ } #endif inline void getInputName(Ort::Session* session, string& inputName,int nIndex=0) { size_t numInputNodes = session->GetInputCount(); funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp
funasr/runtime/onnxruntime/src/funasr-onnx-offline.cpp
File was renamed from funasr/runtime/onnxruntime/tester/tester.cpp @@ -6,9 +6,6 @@ #endif #include "libfunasrapi.h" #include <iostream> #include <fstream> #include <sstream> using namespace std; @@ -41,12 +38,10 @@ printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000); gettimeofday(&start, NULL); float snippet_time = 0.0f; FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL, use_vad); gettimeofday(&end, NULL); float snippet_time = 0.0f; if (Result) { string msg = FunASRGetResult(Result, 0); @@ -57,7 +52,7 @@ } else { cout <<"no return data!"; printf("no return data!"); } printf("Audio length %lfs.\n", (double)snippet_time); funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
@@ -14,7 +14,7 @@ string vad_path = pathAppend(path, "vad_model.onnx"); string mvn_path = pathAppend(path, "vad.mvn"); vadHandle = make_unique<FsmnVad>(); vadHandle->init_vad(vad_path, mvn_path, model_sample_rate, 800, 15000, 0.9); vadHandle->init_vad(vad_path, mvn_path, MODEL_SAMPLE_RATE, VAD_MAX_LEN, VAD_SILENCE_DYRATION, VAD_SPEECH_NOISE_THRES); } if(quantize) @@ -29,7 +29,7 @@ // knf options fbank_opts.frame_opts.dither = 0; fbank_opts.mel_opts.num_bins = 80; fbank_opts.frame_opts.samp_freq = model_sample_rate; fbank_opts.frame_opts.samp_freq = MODEL_SAMPLE_RATE; fbank_opts.frame_opts.window_type = "hamming"; fbank_opts.frame_opts.frame_shift_ms = 10; fbank_opts.frame_opts.frame_length_ms = 25; @@ -191,7 +191,7 @@ { int32_t in_feat_dim = fbank_opts.mel_opts.num_bins; std::vector<float> wav_feats = FbankKaldi(model_sample_rate, din, len); std::vector<float> wav_feats = FbankKaldi(MODEL_SAMPLE_RATE, din, len); wav_feats = ApplyLFR(wav_feats); ApplyCMVN(&wav_feats); funasr/runtime/onnxruntime/src/precomp.h
@@ -1,6 +1,5 @@ #pragma once // system #include <stdint.h> #include <stdlib.h> #include <string.h> @@ -16,8 +15,6 @@ #include <string> #include <math.h> #include <numeric> #include <cstring> using namespace std; @@ -27,27 +24,19 @@ #include "kaldi-native-fbank/csrc/feature-fbank.h" #include "kaldi-native-fbank/csrc/online-feature.h" // mine #include "ComDefine.h" #include "commonfunc.h" #include <ComDefine.h> #include "predefine_coe.h" #include "FsmnVad.h" #include <ComDefine.h> //#include "alignedmem.h" #include "Vocab.h" #include "CommonStruct.h" #include "Audio.h" #include "Tensor.h" #include "util.h" #include "CommonStruct.h" #include "FeatureQueue.h" #include "SpeechWrap.h" #include <Audio.h> #include "resample.h" #include "Model.h" #include "paraformer_onnx.h" #include "libfunasrapi.h" using namespace paraformer; funasr/runtime/onnxruntime/src/tmp.h
File was deleted funasr/runtime/onnxruntime/tester/CMakeLists.txt
File was deleted