From 7ab2e5cf22bbb31808bcacf84c054c710e4e6a93 Mon Sep 17 00:00:00 2001
From: Yabin Li <wucong.lyb@alibaba-inc.com>
Date: 星期一, 24 四月 2023 16:19:17 +0800
Subject: [PATCH] Merge pull request #400 from alibaba-damo-academy/dev_knf
---
funasr/runtime/onnxruntime/src/ct-transformer.cpp | 188
funasr/runtime/onnxruntime/readme.md | 10
funasr/runtime/onnxruntime/src/model.cpp | 8
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/CMakeLists.txt | 28
funasr/runtime/onnxruntime/src/online-feature.h | 51
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.cc | 142
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-feature.cc | 59
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent.cmake | 916 ++++
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/LICENSE | 211 +
funasr/runtime/onnxruntime/include/libfunasrapi.h | 36
funasr/runtime/onnxruntime/src/tokenizer.h | 27
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/utils.sh | 19
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.cc | 67
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.h | 142
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.cc | 37
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.h | 30
funasr/runtime/onnxruntime/src/precomp.h | 36
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/linux-macos.yaml | 70
funasr/runtime/onnxruntime/src/audio.cpp | 150
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.cc | 57
funasr/runtime/onnxruntime/src/e2e-vad.h | 797 +++
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/CMakeLists.txt | 31
funasr/runtime/onnxruntime/src/fsmn-vad.h | 60
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/setup.py | 64
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_frame_extraction_options.py | 119
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.h | 27
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.cc | 58
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_online_fbank.py | 48
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/README.md | 5
funasr/runtime/onnxruntime/include/model.h | 19
funasr/runtime/onnxruntime/src/ct-transformer.h | 26
funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/feature-fbank.h | 134
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-rfft.cc | 52
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/arm-linux-gnueabihf.toolchain.cmake | 17
funasr/runtime/onnxruntime/include/audio.h | 62
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.h | 383 +
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/MANIFEST.in | 5
funasr/runtime/onnxruntime/CMakeLists.txt | 6
funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp | 140
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.cc | 49
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.cc | 257 +
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.h | 30
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-log.cc | 73
funasr/runtime/onnxruntime/src/paraformer.h | 58
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/CMakeLists.txt | 115
funasr/runtime/onnxruntime/src/funasr-onnx-offline.cpp | 38
funasr/runtime/onnxruntime/src/paraformer.cpp | 262 +
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.cc | 67
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-fbank.cc | 48
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent/CMakeLists.cmake.in | 21
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.h | 178
funasr/runtime/onnxruntime/src/alignedmem.cpp | 4
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/fftsg.c | 2968 ++++++++++++++
funasr/runtime/onnxruntime/src/CMakeLists.txt | 46
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/README.md | 11
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt | 8
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_fbank_options.py | 198
funasr/runtime/onnxruntime/src/fsmn-vad.cpp | 273 +
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/README.md | 106
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.cc | 247 +
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.h | 56
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/CMakeLists.txt | 2
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_mel_bank_options.py | 107
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.h | 30
funasr/runtime/onnxruntime/src/tensor.h | 2
funasr/runtime/onnxruntime/src/predefine-coe.h | 0
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.h | 117
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.clang-format | 9
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/check_style_cpplint.sh | 126
funasr/runtime/onnxruntime/include/com-define.h | 48
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/cmake_extension.py | 120
funasr/runtime/onnxruntime/src/util.h | 26
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.cc | 68
funasr/runtime/onnxruntime/src/common-struct.h | 0
funasr/runtime/onnxruntime/src/alignedmem.h | 6
funasr/runtime/onnxruntime/src/vocab.h | 25
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/googletest.cmake | 57
funasr/runtime/onnxruntime/src/tokenizer.cpp | 208 +
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.cc | 120
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.h | 134
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.cc | 136
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/pybind11.cmake | 35
funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/online-feature.h | 142
funasr/runtime/onnxruntime/src/commonfunc.h | 24
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.cc | 165
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/kaldi_native_fbank/__init__.py | 6
funasr/runtime/onnxruntime/src/libfunasrapi.cpp | 192
funasr/runtime/onnxruntime/src/online-feature.cpp | 129
funasr/runtime/onnxruntime/src/util.cpp | 28
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.h | 38
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x86.yaml | 67
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.h | 52
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt.bak | 93
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.h | 30
funasr/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt | 6
/dev/null | 99
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/CMakeLists.txt | 8
funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x64.yaml | 97
funasr/runtime/onnxruntime/src/vocab.cpp | 40
99 files changed, 11,571 insertions(+), 471 deletions(-)
diff --git a/funasr/runtime/onnxruntime/CMakeLists.txt b/funasr/runtime/onnxruntime/CMakeLists.txt
index 6feef92..9879c4a 100644
--- a/funasr/runtime/onnxruntime/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/CMakeLists.txt
@@ -25,6 +25,8 @@
link_directories(${ONNXRUNTIME_DIR}/lib)
endif()
-add_subdirectory("./third_party/yaml-cpp")
+include_directories(${PROJECT_SOURCE_DIR}/third_party/kaldi-native-fbank)
+
+add_subdirectory(third_party/yaml-cpp)
+add_subdirectory(third_party/kaldi-native-fbank/kaldi-native-fbank/csrc)
add_subdirectory(src)
-add_subdirectory(tester)
diff --git a/funasr/runtime/onnxruntime/include/Audio.h b/funasr/runtime/onnxruntime/include/Audio.h
deleted file mode 100644
index ec49a9f..0000000
--- a/funasr/runtime/onnxruntime/include/Audio.h
+++ /dev/null
@@ -1,66 +0,0 @@
-
-#ifndef AUDIO_H
-#define AUDIO_H
-
-#include <ComDefine.h>
-#include <queue>
-#include <stdint.h>
-
-#ifndef model_sample_rate
-#define model_sample_rate 16000
-#endif
-#ifndef WAV_HEADER_SIZE
-#define WAV_HEADER_SIZE 44
-#endif
-
-using namespace std;
-
-class AudioFrame {
- private:
- int start;
- int end;
- int len;
-
- public:
- AudioFrame();
- AudioFrame(int len);
-
- ~AudioFrame();
- int set_start(int val);
- int set_end(int val, int max_len);
- int get_start();
- int get_len();
- int disp();
-};
-
-class Audio {
- private:
- float *speech_data;
- int16_t *speech_buff;
- int speech_len;
- int speech_align_len;
- int offset;
- float align_size;
- int data_type;
- queue<AudioFrame *> frame_queue;
-
- public:
- Audio(int data_type);
- Audio(int data_type, int size);
- ~Audio();
- void disp();
- bool loadwav(const char* filename, int32_t* sampling_rate);
- void wavResample(int32_t sampling_rate, const float *waveform, int32_t n);
- bool loadwav(const char* buf, int nLen, int32_t* sampling_rate);
- bool loadpcmwav(const char* buf, int nFileLen, int32_t* sampling_rate);
- bool loadpcmwav(const char* filename, int32_t* sampling_rate);
- int fetch_chunck(float *&dout, int len);
- int fetch(float *&dout, int &len, int &flag);
- void padding();
- void split();
- float get_time_len();
-
- int get_queue_size() { return (int)frame_queue.size(); }
-};
-
-#endif
diff --git a/funasr/runtime/onnxruntime/include/ComDefine.h b/funasr/runtime/onnxruntime/include/ComDefine.h
deleted file mode 100644
index f131e5e..0000000
--- a/funasr/runtime/onnxruntime/include/ComDefine.h
+++ /dev/null
@@ -1,11 +0,0 @@
-
-#ifndef COMDEFINE_H
-#define COMDEFINE_H
-
-#define S_BEGIN 0
-#define S_MIDDLE 1
-#define S_END 2
-#define S_ALL 3
-#define S_ERR 4
-
-#endif
diff --git a/funasr/runtime/onnxruntime/include/Model.h b/funasr/runtime/onnxruntime/include/Model.h
deleted file mode 100644
index 6f45c38..0000000
--- a/funasr/runtime/onnxruntime/include/Model.h
+++ /dev/null
@@ -1,17 +0,0 @@
-
-#ifndef MODEL_H
-#define MODEL_H
-
-#include <string>
-
-class Model {
- public:
- virtual ~Model(){};
- virtual void reset() = 0;
- virtual std::string forward_chunk(float *din, int len, int flag) = 0;
- virtual std::string forward(float *din, int len, int flag) = 0;
- virtual std::string rescoring() = 0;
-};
-
-Model *create_model(const char *path,int nThread=0,bool quantize=false);
-#endif
diff --git a/funasr/runtime/onnxruntime/include/audio.h b/funasr/runtime/onnxruntime/include/audio.h
new file mode 100644
index 0000000..ab9f420
--- /dev/null
+++ b/funasr/runtime/onnxruntime/include/audio.h
@@ -0,0 +1,62 @@
+
+#ifndef AUDIO_H
+#define AUDIO_H
+
+#include <queue>
+#include <stdint.h>
+#include "model.h"
+
+#ifndef WAV_HEADER_SIZE
+#define WAV_HEADER_SIZE 44
+#endif
+
+using namespace std;
+
+class AudioFrame {
+ private:
+ int start;
+ int end;
+ int len;
+
+ public:
+ AudioFrame();
+ AudioFrame(int len);
+
+ ~AudioFrame();
+ int SetStart(int val);
+ int SetEnd(int val);
+ int GetStart();
+ int GetLen();
+ int Disp();
+};
+
+class Audio {
+ private:
+ float *speech_data;
+ int16_t *speech_buff;
+ int speech_len;
+ int speech_align_len;
+ int offset;
+ float align_size;
+ int data_type;
+ queue<AudioFrame *> frame_queue;
+
+ public:
+ Audio(int data_type);
+ Audio(int data_type, int size);
+ ~Audio();
+ void Disp();
+ bool LoadWav(const char* filename, int32_t* sampling_rate);
+ void WavResample(int32_t sampling_rate, const float *waveform, int32_t n);
+ bool LoadWav(const char* buf, int n_len, int32_t* sampling_rate);
+ bool LoadPcmwav(const char* buf, int n_file_len, int32_t* sampling_rate);
+ bool LoadPcmwav(const char* filename, int32_t* sampling_rate);
+ int FetchChunck(float *&dout, int len);
+ int Fetch(float *&dout, int &len, int &flag);
+ void Padding();
+ void Split(Model* recog_obj);
+ float GetTimeLen();
+ int GetQueueSize() { return (int)frame_queue.size(); }
+};
+
+#endif
diff --git a/funasr/runtime/onnxruntime/include/com-define.h b/funasr/runtime/onnxruntime/include/com-define.h
new file mode 100644
index 0000000..e2c22f4
--- /dev/null
+++ b/funasr/runtime/onnxruntime/include/com-define.h
@@ -0,0 +1,48 @@
+
+#ifndef COMDEFINE_H
+#define COMDEFINE_H
+
+#define S_BEGIN 0
+#define S_MIDDLE 1
+#define S_END 2
+#define S_ALL 3
+#define S_ERR 4
+
+#ifndef MODEL_SAMPLE_RATE
+#define MODEL_SAMPLE_RATE 16000
+#endif
+
+// vad
+#ifndef VAD_SILENCE_DYRATION
+#define VAD_SILENCE_DYRATION 15000
+#endif
+
+#ifndef VAD_MAX_LEN
+#define VAD_MAX_LEN 800
+#endif
+
+#ifndef VAD_SPEECH_NOISE_THRES
+#define VAD_SPEECH_NOISE_THRES 0.9
+#endif
+
+// punc
+#define PUNC_MODEL_FILE "punc_model.onnx"
+#define PUNC_YAML_FILE "punc.yaml"
+#define UNK_CHAR "<unk>"
+
+#define INPUT_NUM 2
+#define INPUT_NAME1 "input"
+#define INPUT_NAME2 "text_lengths"
+#define OUTPUT_NAME "logits"
+#define TOKEN_LEN 20
+
+#define CANDIDATE_NUM 6
+#define UNKNOW_INDEX 0
+#define NOTPUNC_INDEX 1
+#define COMMA_INDEX 2
+#define PERIOD_INDEX 3
+#define QUESTION_INDEX 4
+#define DUN_INDEX 5
+#define CACHE_POP_TRIGGER_LIMIT 200
+
+#endif
diff --git a/funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/feature-fbank.h b/funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/feature-fbank.h
new file mode 100644
index 0000000..0786aad
--- /dev/null
+++ b/funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/feature-fbank.h
@@ -0,0 +1,134 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is copied/modified from kaldi/src/feat/feature-fbank.h
+
+#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
+#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+#include "kaldi-native-fbank/csrc/mel-computations.h"
+#include "kaldi-native-fbank/csrc/rfft.h"
+
+namespace knf {
+
+struct FbankOptions {
+ FrameExtractionOptions frame_opts;
+ MelBanksOptions mel_opts;
+ // append an extra dimension with energy to the filter banks
+ bool use_energy = false;
+ float energy_floor = 0.0f; // active iff use_energy==true
+
+ // If true, compute log_energy before preemphasis and windowing
+ // If false, compute log_energy after preemphasis ans windowing
+ bool raw_energy = true; // active iff use_energy==true
+
+ // If true, put energy last (if using energy)
+ // If false, put energy first
+ bool htk_compat = false; // active iff use_energy==true
+
+ // if true (default), produce log-filterbank, else linear
+ bool use_log_fbank = true;
+
+ // if true (default), use power in filterbank
+ // analysis, else magnitude.
+ bool use_power = true;
+
+ FbankOptions() { mel_opts.num_bins = 23; }
+
+ std::string ToString() const {
+ std::ostringstream os;
+ os << "frame_opts: \n";
+ os << frame_opts << "\n";
+ os << "\n";
+
+ os << "mel_opts: \n";
+ os << mel_opts << "\n";
+
+ os << "use_energy: " << use_energy << "\n";
+ os << "energy_floor: " << energy_floor << "\n";
+ os << "raw_energy: " << raw_energy << "\n";
+ os << "htk_compat: " << htk_compat << "\n";
+ os << "use_log_fbank: " << use_log_fbank << "\n";
+ os << "use_power: " << use_power << "\n";
+ return os.str();
+ }
+};
+
+std::ostream &operator<<(std::ostream &os, const FbankOptions &opts);
+
+class FbankComputer {
+ public:
+ using Options = FbankOptions;
+
+ explicit FbankComputer(const FbankOptions &opts);
+ ~FbankComputer();
+
+ int32_t Dim() const {
+ return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
+ }
+
+ // if true, compute log_energy_pre_window but after dithering and dc removal
+ bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
+
+ const FrameExtractionOptions &GetFrameOptions() const {
+ return opts_.frame_opts;
+ }
+
+ const FbankOptions &GetOptions() const { return opts_; }
+
+ /**
+ Function that computes one frame of features from
+ one frame of signal.
+
+ @param [in] signal_raw_log_energy The log-energy of the frame of the signal
+ prior to windowing and pre-emphasis, or
+ log(numeric_limits<float>::min()), whichever is greater. Must be
+ ignored by this function if this class returns false from
+ this->NeedsRawLogEnergy().
+ @param [in] vtln_warp The VTLN warping factor that the user wants
+ to be applied when computing features for this utterance. Will
+ normally be 1.0, meaning no warping is to be done. The value will
+ be ignored for feature types that don't support VLTN, such as
+ spectrogram features.
+ @param [in] signal_frame One frame of the signal,
+ as extracted using the function ExtractWindow() using the options
+ returned by this->GetFrameOptions(). The function will use the
+ vector as a workspace, which is why it's a non-const pointer.
+ @param [out] feature Pointer to a vector of size this->Dim(), to which
+ the computed feature will be written. It should be pre-allocated.
+ */
+ void Compute(float signal_raw_log_energy, float vtln_warp,
+ std::vector<float> *signal_frame, float *feature);
+
+ private:
+ const MelBanks *GetMelBanks(float vtln_warp);
+
+ FbankOptions opts_;
+ float log_energy_floor_;
+ std::map<float, MelBanks *> mel_banks_; // float is VTLN coefficient.
+ Rfft rfft_;
+};
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
diff --git a/funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/online-feature.h b/funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/online-feature.h
new file mode 100644
index 0000000..5ca5511
--- /dev/null
+++ b/funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/online-feature.h
@@ -0,0 +1,142 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// The content in this file is copied/modified from
+// This file is copied/modified from kaldi/src/feat/online-feature.h
+#ifndef KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
+#define KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
+
+#include <cstdint>
+#include <deque>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+
+namespace knf {
+
+/// This class serves as a storage for feature vectors with an option to limit
+/// the memory usage by removing old elements. The deleted frames indices are
+/// "remembered" so that regardless of the MAX_ITEMS setting, the user always
+/// provides the indices as if no deletion was being performed.
+/// This is useful when processing very long recordings which would otherwise
+/// cause the memory to eventually blow up when the features are not being
+/// removed.
+class RecyclingVector {
+ public:
+ /// By default it does not remove any elements.
+ explicit RecyclingVector(int32_t items_to_hold = -1);
+
+ ~RecyclingVector() = default;
+ RecyclingVector(const RecyclingVector &) = delete;
+ RecyclingVector &operator=(const RecyclingVector &) = delete;
+
+ // The pointer is owned by RecyclingVector
+ // Users should not free it
+ const float *At(int32_t index) const;
+
+ void PushBack(std::vector<float> item);
+
+ /// This method returns the size as if no "recycling" had happened,
+ /// i.e. equivalent to the number of times the PushBack method has been
+ /// called.
+ int32_t Size() const;
+
+ private:
+ std::deque<std::vector<float>> items_;
+ int32_t items_to_hold_;
+ int32_t first_available_index_;
+};
+
+/// This is a templated class for online feature extraction;
+/// it's templated on a class like MfccComputer or PlpComputer
+/// that does the basic feature extraction.
+template <class C>
+class OnlineGenericBaseFeature {
+ public:
+ // Constructor from options class
+ explicit OnlineGenericBaseFeature(const typename C::Options &opts);
+
+ int32_t Dim() const { return computer_.Dim(); }
+
+ float FrameShiftInSeconds() const {
+ return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
+ }
+
+ int32_t NumFramesReady() const { return features_.Size(); }
+
+ // Note: IsLastFrame() will only ever return true if you have called
+ // InputFinished() (and this frame is the last frame).
+ bool IsLastFrame(int32_t frame) const {
+ return input_finished_ && frame == NumFramesReady() - 1;
+ }
+
+ const float *GetFrame(int32_t frame) const { return features_.At(frame); }
+
+ // This would be called from the application, when you get
+ // more wave data. Note: the sampling_rate is only provided so
+ // the code can assert that it matches the sampling rate
+ // expected in the options.
+ //
+ // @param sampling_rate The sampling_rate of the input waveform
+ // @param waveform Pointer to a 1-D array of size n
+ // @param n Number of entries in waveform
+ void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n);
+
+ // InputFinished() tells the class you won't be providing any
+ // more waveform. This will help flush out the last frame or two
+ // of features, in the case where snip-edges == false; it also
+ // affects the return value of IsLastFrame().
+ void InputFinished();
+
+ private:
+ // This function computes any additional feature frames that it is possible to
+ // compute from 'waveform_remainder_', which at this point may contain more
+ // than just a remainder-sized quantity (because AcceptWaveform() appends to
+ // waveform_remainder_ before calling this function). It adds these feature
+ // frames to features_, and shifts off any now-unneeded samples of input from
+ // waveform_remainder_ while incrementing waveform_offset_ by the same amount.
+ void ComputeFeatures();
+
+ C computer_; // class that does the MFCC or PLP or filterbank computation
+
+ FeatureWindowFunction window_function_;
+
+ // features_ is the Mfcc or Plp or Fbank features that we have already
+ // computed.
+
+ RecyclingVector features_;
+
+ // True if the user has called "InputFinished()"
+ bool input_finished_;
+
+ // waveform_offset_ is the number of samples of waveform that we have
+ // already discarded, i.e. that were prior to 'waveform_remainder_'.
+ int64_t waveform_offset_;
+
+ // waveform_remainder_ is a short piece of waveform that we may need to keep
+ // after extracting all the whole frames we can (whatever length of feature
+ // will be required for the next phase of computation).
+ // It is a 1-D tensor
+ std::vector<float> waveform_remainder_;
+};
+
+using OnlineFbank = OnlineGenericBaseFeature<FbankComputer>;
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
diff --git a/funasr/runtime/onnxruntime/include/libfunasrapi.h b/funasr/runtime/onnxruntime/include/libfunasrapi.h
index 9bc37e7..6b6e148 100644
--- a/funasr/runtime/onnxruntime/include/libfunasrapi.h
+++ b/funasr/runtime/onnxruntime/include/libfunasrapi.h
@@ -35,7 +35,6 @@
RASRM_CTC_GREEDY_SEARCH=0,
RASRM_CTC_RPEFIX_BEAM_SEARCH = 1,
RASRM_ATTENSION_RESCORING = 2,
-
}FUNASR_MODE;
typedef enum {
@@ -43,33 +42,24 @@
FUNASR_MODEL_PADDLE_2 = 1,
FUNASR_MODEL_K2 = 2,
FUNASR_MODEL_PARAFORMER = 3,
-
}FUNASR_MODEL_TYPE;
-typedef void (* QM_CALLBACK)(int nCurStep, int nTotal); // nTotal: total steps; nCurStep: Current Step.
+typedef void (* QM_CALLBACK)(int cur_step, int n_total); // n_total: total steps; cur_step: Current Step.
-// APIs for qmasr
-_FUNASRAPI FUNASR_HANDLE FunASRInit(const char* szModelDir, int nThread, bool quantize);
+// APIs for funasr
+_FUNASRAPI FUNASR_HANDLE FunASRInit(const char* sz_model_dir, int thread_num, bool quantize=false, bool use_vad=false, bool use_punc=false);
+// if not give a fn_callback ,it should be NULL
+_FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad=false, bool use_punc=false);
+_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad=false, bool use_punc=false);
+_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad=false, bool use_punc=false);
+_FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad=false, bool use_punc=false);
-// if not give a fnCallback ,it should be NULL
-_FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
-
-_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
-
-_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
-
-_FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* szWavfile, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
-
-_FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT Result,int nIndex);
-
-_FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT Result);
-
-_FUNASRAPI void FunASRFreeResult(FUNASR_RESULT Result);
-
-_FUNASRAPI void FunASRUninit(FUNASR_HANDLE Handle);
-
-_FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT Result);
+_FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT result,int n_index);
+_FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result);
+_FUNASRAPI void FunASRFreeResult(FUNASR_RESULT result);
+_FUNASRAPI void FunASRUninit(FUNASR_HANDLE handle);
+_FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT result);
#ifdef __cplusplus
diff --git a/funasr/runtime/onnxruntime/include/model.h b/funasr/runtime/onnxruntime/include/model.h
new file mode 100644
index 0000000..26a67f0
--- /dev/null
+++ b/funasr/runtime/onnxruntime/include/model.h
@@ -0,0 +1,19 @@
+
+#ifndef MODEL_H
+#define MODEL_H
+
+#include <string>
+
+class Model {
+ public:
+ virtual ~Model(){};
+ virtual void Reset() = 0;
+ virtual std::string ForwardChunk(float *din, int len, int flag) = 0;
+ virtual std::string Forward(float *din, int len, int flag) = 0;
+ virtual std::string Rescoring() = 0;
+ virtual std::vector<std::vector<int>> VadSeg(std::vector<float>& pcm_data)=0;
+ virtual std::string AddPunc(const char* sz_input)=0;
+};
+
+Model *CreateModel(const char *path,int thread_num=1,bool quantize=false, bool use_vad=false, bool use_punc=false);
+#endif
diff --git a/funasr/runtime/onnxruntime/readme.md b/funasr/runtime/onnxruntime/readme.md
index f7be2e0..6886d58 100644
--- a/funasr/runtime/onnxruntime/readme.md
+++ b/funasr/runtime/onnxruntime/readme.md
@@ -25,12 +25,6 @@
tar -zxvf onnxruntime-linux-x64-1.14.0.tgz
```
-### Install fftw3
-```shell
-sudo apt install libfftw3-dev #ubuntu
-# sudo yum install fftw fftw-devel #centos
-```
-
### Install openblas
```shell
sudo apt-get install libopenblas-dev #ubuntu
@@ -65,12 +59,12 @@
## Run the demo
```shell
-tester /path/models_dir /path/wave_file quantize(true or false)
+funasr-onnx-offline /path/models_dir /path/wave_file quantize(true or false) use_vad(true or false) use_punc(true or false)
```
The structure of /path/models_dir
```
-config.yaml, am.mvn, model.onnx(or model_quant.onnx)
+config.yaml, am.mvn, model.onnx(or model_quant.onnx), (vad_model.onnx, vad.mvn if you use vad), (punc_model.onnx, punc.yaml if you use vad)
```
diff --git a/funasr/runtime/onnxruntime/src/CMakeLists.txt b/funasr/runtime/onnxruntime/src/CMakeLists.txt
index d41fcd0..e00edc1 100644
--- a/funasr/runtime/onnxruntime/src/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/src/CMakeLists.txt
@@ -1,44 +1,32 @@
file(GLOB files1 "*.cpp")
file(GLOB files2 "*.cc")
-file(GLOB files4 "paraformer/*.cpp")
-set(files ${files1} ${files2} ${files3} ${files4})
-
-# message("${files}")
+set(files ${files1} ${files2})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
add_library(funasr ${files})
if(WIN32)
-
- set(EXTRA_LIBS libfftw3f-3 yaml-cpp)
- if(CMAKE_CL_64)
- target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
- else()
- target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
- endif()
- target_include_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
-
- target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT)
+ set(EXTRA_LIBS pthread yaml-cpp csrc)
+ if(CMAKE_CL_64)
+ target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
+ else()
+ target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
+ endif()
+ target_include_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
+
+ target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT)
else()
-
- set(EXTRA_LIBS fftw3f pthread yaml-cpp)
- target_include_directories(funasr PUBLIC "/usr/local/opt/fftw/include")
- target_link_directories(funasr PUBLIC "/usr/local/opt/fftw/lib")
-
- target_include_directories(funasr PUBLIC "/usr/local/opt/openblas/include")
- target_link_directories(funasr PUBLIC "/usr/local/opt/openblas/lib")
-
- target_include_directories(funasr PUBLIC "/usr/include")
- target_link_directories(funasr PUBLIC "/usr/lib64")
-
- target_include_directories(funasr PUBLIC ${FFTW3F_INCLUDE_DIR})
- target_link_directories(funasr PUBLIC ${FFTW3F_LIBRARY_DIR})
- include_directories(${ONNXRUNTIME_DIR}/include)
+ set(EXTRA_LIBS pthread yaml-cpp csrc)
+ include_directories(${ONNXRUNTIME_DIR}/include)
endif()
include_directories(${CMAKE_SOURCE_DIR}/include)
target_link_libraries(funasr PUBLIC onnxruntime ${EXTRA_LIBS})
-
+add_executable(funasr-onnx-offline "funasr-onnx-offline.cpp")
+add_executable(funasr-onnx-offline-rtf "funasr-onnx-offline-rtf.cpp")
+target_link_libraries(funasr-onnx-offline PUBLIC funasr)
+target_link_libraries(funasr-onnx-offline-rtf PUBLIC funasr)
diff --git a/funasr/runtime/onnxruntime/src/FeatureExtract.cpp b/funasr/runtime/onnxruntime/src/FeatureExtract.cpp
deleted file mode 100644
index 6d2826a..0000000
--- a/funasr/runtime/onnxruntime/src/FeatureExtract.cpp
+++ /dev/null
@@ -1,396 +0,0 @@
-
-#include "precomp.h"
-
-using namespace std;
-
-FeatureExtract::FeatureExtract(int mode) : mode(mode)
-{
-}
-
-FeatureExtract::~FeatureExtract()
-{
-}
-
-void FeatureExtract::reset()
-{
- speech.reset();
- fqueue.reset();
-}
-
-int FeatureExtract::size()
-{
- return fqueue.size();
-}
-
-void FeatureExtract::insert(fftwf_plan plan, float *din, int len, int flag)
-{
- float* fft_input = (float *)fftwf_malloc(sizeof(float) * fft_size);
- fftwf_complex* fft_out = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * fft_size);
- memset(fft_input, 0, sizeof(float) * fft_size);
-
- const float *window = (const float *)&window_hex;
- if (mode == 3)
- window = (const float *)&window_hamm_hex;
-
- speech.load(din, len);
- int i, j;
- float tmp_feature[80];
- if (mode == 0 || mode == 2 || mode == 3) {
- int ll = (speech.size() - window_size) / window_shift + 1;
- fqueue.reinit(ll);
- }
-
- for (i = 0; i <= speech.size() - window_size; i = i + window_shift) {
- float tmp_mean = 0;
- for (j = 0; j < window_size; j++) {
- tmp_mean += speech[i + j];
- }
-
- tmp_mean = tmp_mean / window_size;
-
- float pre_val = (float)speech[i] - tmp_mean;
-
- for (j = 0; j < window_size; j++) {
- float win = window[j];
- float cur_val = (float)speech[i + j] - tmp_mean;
- fft_input[j] = win * (cur_val - 0.97 * pre_val);
- pre_val = cur_val;
- }
-
- fftwf_execute_dft_r2c(plan, fft_input, fft_out);
-
- melspect((float *)fft_out, tmp_feature);
- int tmp_flag = S_MIDDLE;
- if (flag == S_END && i > speech.size() - 560)
- tmp_flag = S_END;
-
- fqueue.push(tmp_feature, tmp_flag);
- }
- speech.update(i);
- fftwf_free(fft_input);
- fftwf_free(fft_out);
-}
-
-bool FeatureExtract::fetch(Tensor<float> *&dout)
-{
- if (fqueue.size() < 1) {
- return false;
- } else {
- dout = fqueue.pop();
- return true;
- }
-}
-
-void FeatureExtract::global_cmvn(float *din)
-{
- const float *std;
- const float *mean;
-
- if (mode < 2) {
- if (mode == 0) {
- std = (const float *)global_cmvn_std_hex;
- mean = (const float *)global_cmvn_mean_hex;
- } else {
- std = (const float *)global_cmvn_std_online_hex;
- mean = (const float *)global_cmvn_mean_online_hex;
- }
-
- int i;
- for (i = 0; i < 80; i++) {
- float tmp = din[i] < 1e-7 ? 1e-7 : din[i];
- tmp = log(tmp);
- din[i] = (tmp - mean[i]) / std[i];
- }
- } else {
- int i;
-
- int val = 0x34000000;
- float min_resol = *((float *)&val);
-
- for (i = 0; i < 80; i++) {
- float tmp = din[i] < min_resol ? min_resol : din[i];
- din[i] = log(tmp);
- }
- }
-}
-
-void FeatureExtract::melspect(float *din, float *dout)
-{
- float fftmag[256];
- const float *melcoe = (const float *)melcoe_hex;
- int i;
- for (i = 0; i < 256; i++) {
- float real = din[2 * i];
- float imag = din[2 * i + 1];
- fftmag[i] = real * real + imag * imag;
- }
- dout[0] = melcoe[0] * fftmag[1] + melcoe[1] * fftmag[2];
- dout[1] = melcoe[2] * fftmag[2];
- dout[2] = melcoe[3] * fftmag[3];
- dout[3] = melcoe[4] * fftmag[3] + melcoe[5] * fftmag[4];
- dout[4] = melcoe[6] * fftmag[4] + melcoe[7] * fftmag[5];
- dout[5] = melcoe[8] * fftmag[5] + melcoe[9] * fftmag[6];
- dout[6] = melcoe[10] * fftmag[6] + melcoe[11] * fftmag[7];
- dout[7] = melcoe[12] * fftmag[7];
- dout[8] = melcoe[13] * fftmag[8];
- dout[9] = melcoe[14] * fftmag[8] + melcoe[15] * fftmag[9];
- dout[10] = melcoe[16] * fftmag[9] + melcoe[17] * fftmag[10];
- dout[11] = melcoe[18] * fftmag[10] + melcoe[19] * fftmag[11];
- dout[12] = melcoe[20] * fftmag[11] + melcoe[21] * fftmag[12] +
- melcoe[22] * fftmag[13];
- dout[13] = melcoe[23] * fftmag[12] + melcoe[24] * fftmag[13] +
- melcoe[25] * fftmag[14];
- dout[14] = melcoe[26] * fftmag[14] + melcoe[27] * fftmag[15];
- dout[15] = melcoe[28] * fftmag[15] + melcoe[29] * fftmag[16];
- dout[16] = melcoe[30] * fftmag[16] + melcoe[31] * fftmag[17];
- dout[17] = melcoe[32] * fftmag[17] + melcoe[33] * fftmag[18];
- dout[18] = melcoe[34] * fftmag[18] + melcoe[35] * fftmag[19] +
- melcoe[36] * fftmag[20];
- dout[19] = melcoe[37] * fftmag[19] + melcoe[38] * fftmag[20] +
- melcoe[39] * fftmag[21];
- dout[20] = melcoe[40] * fftmag[21] + melcoe[41] * fftmag[22];
- dout[21] = melcoe[42] * fftmag[22] + melcoe[43] * fftmag[23] +
- melcoe[44] * fftmag[24];
- dout[22] = melcoe[45] * fftmag[23] + melcoe[46] * fftmag[24] +
- melcoe[47] * fftmag[25];
- dout[23] = melcoe[48] * fftmag[25] + melcoe[49] * fftmag[26] +
- melcoe[50] * fftmag[27];
- dout[24] = melcoe[51] * fftmag[26] + melcoe[52] * fftmag[27] +
- melcoe[53] * fftmag[28];
- dout[25] = melcoe[54] * fftmag[28] + melcoe[55] * fftmag[29] +
- melcoe[56] * fftmag[30];
- dout[26] = melcoe[57] * fftmag[29] + melcoe[58] * fftmag[30] +
- melcoe[59] * fftmag[31] + melcoe[60] * fftmag[32];
- dout[27] = melcoe[61] * fftmag[31] + melcoe[62] * fftmag[32] +
- melcoe[63] * fftmag[33];
- dout[28] = melcoe[64] * fftmag[33] + melcoe[65] * fftmag[34] +
- melcoe[66] * fftmag[35];
- dout[29] = melcoe[67] * fftmag[34] + melcoe[68] * fftmag[35] +
- melcoe[69] * fftmag[36] + melcoe[70] * fftmag[37];
- dout[30] = melcoe[71] * fftmag[36] + melcoe[72] * fftmag[37] +
- melcoe[73] * fftmag[38] + melcoe[74] * fftmag[39];
- dout[31] = melcoe[75] * fftmag[38] + melcoe[76] * fftmag[39] +
- melcoe[77] * fftmag[40] + melcoe[78] * fftmag[41];
- dout[32] = melcoe[79] * fftmag[40] + melcoe[80] * fftmag[41] +
- melcoe[81] * fftmag[42] + melcoe[82] * fftmag[43];
- dout[33] = melcoe[83] * fftmag[42] + melcoe[84] * fftmag[43] +
- melcoe[85] * fftmag[44] + melcoe[86] * fftmag[45];
- dout[34] = melcoe[87] * fftmag[44] + melcoe[88] * fftmag[45] +
- melcoe[89] * fftmag[46] + melcoe[90] * fftmag[47];
- dout[35] = melcoe[91] * fftmag[46] + melcoe[92] * fftmag[47] +
- melcoe[93] * fftmag[48] + melcoe[94] * fftmag[49];
- dout[36] = melcoe[95] * fftmag[48] + melcoe[96] * fftmag[49] +
- melcoe[97] * fftmag[50] + melcoe[98] * fftmag[51];
- dout[37] = melcoe[99] * fftmag[50] + melcoe[100] * fftmag[51] +
- melcoe[101] * fftmag[52] + melcoe[102] * fftmag[53] +
- melcoe[103] * fftmag[54];
- dout[38] = melcoe[104] * fftmag[52] + melcoe[105] * fftmag[53] +
- melcoe[106] * fftmag[54] + melcoe[107] * fftmag[55] +
- melcoe[108] * fftmag[56];
- dout[39] = melcoe[109] * fftmag[55] + melcoe[110] * fftmag[56] +
- melcoe[111] * fftmag[57] + melcoe[112] * fftmag[58];
- dout[40] = melcoe[113] * fftmag[57] + melcoe[114] * fftmag[58] +
- melcoe[115] * fftmag[59] + melcoe[116] * fftmag[60] +
- melcoe[117] * fftmag[61];
- dout[41] = melcoe[118] * fftmag[59] + melcoe[119] * fftmag[60] +
- melcoe[120] * fftmag[61] + melcoe[121] * fftmag[62] +
- melcoe[122] * fftmag[63] + melcoe[123] * fftmag[64];
- dout[42] = melcoe[124] * fftmag[62] + melcoe[125] * fftmag[63] +
- melcoe[126] * fftmag[64] + melcoe[127] * fftmag[65] +
- melcoe[128] * fftmag[66];
- dout[43] = melcoe[129] * fftmag[65] + melcoe[130] * fftmag[66] +
- melcoe[131] * fftmag[67] + melcoe[132] * fftmag[68] +
- melcoe[133] * fftmag[69];
- dout[44] = melcoe[134] * fftmag[67] + melcoe[135] * fftmag[68] +
- melcoe[136] * fftmag[69] + melcoe[137] * fftmag[70] +
- melcoe[138] * fftmag[71] + melcoe[139] * fftmag[72];
- dout[45] = melcoe[140] * fftmag[70] + melcoe[141] * fftmag[71] +
- melcoe[142] * fftmag[72] + melcoe[143] * fftmag[73] +
- melcoe[144] * fftmag[74] + melcoe[145] * fftmag[75];
- dout[46] = melcoe[146] * fftmag[73] + melcoe[147] * fftmag[74] +
- melcoe[148] * fftmag[75] + melcoe[149] * fftmag[76] +
- melcoe[150] * fftmag[77] + melcoe[151] * fftmag[78];
- dout[47] = melcoe[152] * fftmag[76] + melcoe[153] * fftmag[77] +
- melcoe[154] * fftmag[78] + melcoe[155] * fftmag[79] +
- melcoe[156] * fftmag[80] + melcoe[157] * fftmag[81];
- dout[48] = melcoe[158] * fftmag[79] + melcoe[159] * fftmag[80] +
- melcoe[160] * fftmag[81] + melcoe[161] * fftmag[82] +
- melcoe[162] * fftmag[83] + melcoe[163] * fftmag[84];
- dout[49] = melcoe[164] * fftmag[82] + melcoe[165] * fftmag[83] +
- melcoe[166] * fftmag[84] + melcoe[167] * fftmag[85] +
- melcoe[168] * fftmag[86] + melcoe[169] * fftmag[87] +
- melcoe[170] * fftmag[88];
- dout[50] = melcoe[171] * fftmag[85] + melcoe[172] * fftmag[86] +
- melcoe[173] * fftmag[87] + melcoe[174] * fftmag[88] +
- melcoe[175] * fftmag[89] + melcoe[176] * fftmag[90] +
- melcoe[177] * fftmag[91];
- dout[51] = melcoe[178] * fftmag[89] + melcoe[179] * fftmag[90] +
- melcoe[180] * fftmag[91] + melcoe[181] * fftmag[92] +
- melcoe[182] * fftmag[93] + melcoe[183] * fftmag[94] +
- melcoe[184] * fftmag[95];
- dout[52] = melcoe[185] * fftmag[92] + melcoe[186] * fftmag[93] +
- melcoe[187] * fftmag[94] + melcoe[188] * fftmag[95] +
- melcoe[189] * fftmag[96] + melcoe[190] * fftmag[97] +
- melcoe[191] * fftmag[98];
- dout[53] = melcoe[192] * fftmag[96] + melcoe[193] * fftmag[97] +
- melcoe[194] * fftmag[98] + melcoe[195] * fftmag[99] +
- melcoe[196] * fftmag[100] + melcoe[197] * fftmag[101] +
- melcoe[198] * fftmag[102];
- dout[54] = melcoe[199] * fftmag[99] + melcoe[200] * fftmag[100] +
- melcoe[201] * fftmag[101] + melcoe[202] * fftmag[102] +
- melcoe[203] * fftmag[103] + melcoe[204] * fftmag[104] +
- melcoe[205] * fftmag[105] + melcoe[206] * fftmag[106];
- dout[55] = melcoe[207] * fftmag[103] + melcoe[208] * fftmag[104] +
- melcoe[209] * fftmag[105] + melcoe[210] * fftmag[106] +
- melcoe[211] * fftmag[107] + melcoe[212] * fftmag[108] +
- melcoe[213] * fftmag[109] + melcoe[214] * fftmag[110];
- dout[56] = melcoe[215] * fftmag[107] + melcoe[216] * fftmag[108] +
- melcoe[217] * fftmag[109] + melcoe[218] * fftmag[110] +
- melcoe[219] * fftmag[111] + melcoe[220] * fftmag[112] +
- melcoe[221] * fftmag[113] + melcoe[222] * fftmag[114];
- dout[57] = melcoe[223] * fftmag[111] + melcoe[224] * fftmag[112] +
- melcoe[225] * fftmag[113] + melcoe[226] * fftmag[114] +
- melcoe[227] * fftmag[115] + melcoe[228] * fftmag[116] +
- melcoe[229] * fftmag[117] + melcoe[230] * fftmag[118] +
- melcoe[231] * fftmag[119];
- dout[58] = melcoe[232] * fftmag[115] + melcoe[233] * fftmag[116] +
- melcoe[234] * fftmag[117] + melcoe[235] * fftmag[118] +
- melcoe[236] * fftmag[119] + melcoe[237] * fftmag[120] +
- melcoe[238] * fftmag[121] + melcoe[239] * fftmag[122] +
- melcoe[240] * fftmag[123];
- dout[59] = melcoe[241] * fftmag[120] + melcoe[242] * fftmag[121] +
- melcoe[243] * fftmag[122] + melcoe[244] * fftmag[123] +
- melcoe[245] * fftmag[124] + melcoe[246] * fftmag[125] +
- melcoe[247] * fftmag[126] + melcoe[248] * fftmag[127] +
- melcoe[249] * fftmag[128];
- dout[60] = melcoe[250] * fftmag[124] + melcoe[251] * fftmag[125] +
- melcoe[252] * fftmag[126] + melcoe[253] * fftmag[127] +
- melcoe[254] * fftmag[128] + melcoe[255] * fftmag[129] +
- melcoe[256] * fftmag[130] + melcoe[257] * fftmag[131] +
- melcoe[258] * fftmag[132];
- dout[61] = melcoe[259] * fftmag[129] + melcoe[260] * fftmag[130] +
- melcoe[261] * fftmag[131] + melcoe[262] * fftmag[132] +
- melcoe[263] * fftmag[133] + melcoe[264] * fftmag[134] +
- melcoe[265] * fftmag[135] + melcoe[266] * fftmag[136] +
- melcoe[267] * fftmag[137];
- dout[62] = melcoe[268] * fftmag[133] + melcoe[269] * fftmag[134] +
- melcoe[270] * fftmag[135] + melcoe[271] * fftmag[136] +
- melcoe[272] * fftmag[137] + melcoe[273] * fftmag[138] +
- melcoe[274] * fftmag[139] + melcoe[275] * fftmag[140] +
- melcoe[276] * fftmag[141] + melcoe[277] * fftmag[142];
- dout[63] = melcoe[278] * fftmag[138] + melcoe[279] * fftmag[139] +
- melcoe[280] * fftmag[140] + melcoe[281] * fftmag[141] +
- melcoe[282] * fftmag[142] + melcoe[283] * fftmag[143] +
- melcoe[284] * fftmag[144] + melcoe[285] * fftmag[145] +
- melcoe[286] * fftmag[146] + melcoe[287] * fftmag[147];
- dout[64] = melcoe[288] * fftmag[143] + melcoe[289] * fftmag[144] +
- melcoe[290] * fftmag[145] + melcoe[291] * fftmag[146] +
- melcoe[292] * fftmag[147] + melcoe[293] * fftmag[148] +
- melcoe[294] * fftmag[149] + melcoe[295] * fftmag[150] +
- melcoe[296] * fftmag[151] + melcoe[297] * fftmag[152] +
- melcoe[298] * fftmag[153];
- dout[65] = melcoe[299] * fftmag[148] + melcoe[300] * fftmag[149] +
- melcoe[301] * fftmag[150] + melcoe[302] * fftmag[151] +
- melcoe[303] * fftmag[152] + melcoe[304] * fftmag[153] +
- melcoe[305] * fftmag[154] + melcoe[306] * fftmag[155] +
- melcoe[307] * fftmag[156] + melcoe[308] * fftmag[157] +
- melcoe[309] * fftmag[158];
- dout[66] = melcoe[310] * fftmag[154] + melcoe[311] * fftmag[155] +
- melcoe[312] * fftmag[156] + melcoe[313] * fftmag[157] +
- melcoe[314] * fftmag[158] + melcoe[315] * fftmag[159] +
- melcoe[316] * fftmag[160] + melcoe[317] * fftmag[161] +
- melcoe[318] * fftmag[162] + melcoe[319] * fftmag[163] +
- melcoe[320] * fftmag[164];
- dout[67] = melcoe[321] * fftmag[159] + melcoe[322] * fftmag[160] +
- melcoe[323] * fftmag[161] + melcoe[324] * fftmag[162] +
- melcoe[325] * fftmag[163] + melcoe[326] * fftmag[164] +
- melcoe[327] * fftmag[165] + melcoe[328] * fftmag[166] +
- melcoe[329] * fftmag[167] + melcoe[330] * fftmag[168] +
- melcoe[331] * fftmag[169] + melcoe[332] * fftmag[170];
- dout[68] = melcoe[333] * fftmag[165] + melcoe[334] * fftmag[166] +
- melcoe[335] * fftmag[167] + melcoe[336] * fftmag[168] +
- melcoe[337] * fftmag[169] + melcoe[338] * fftmag[170] +
- melcoe[339] * fftmag[171] + melcoe[340] * fftmag[172] +
- melcoe[341] * fftmag[173] + melcoe[342] * fftmag[174] +
- melcoe[343] * fftmag[175] + melcoe[344] * fftmag[176];
- dout[69] = melcoe[345] * fftmag[171] + melcoe[346] * fftmag[172] +
- melcoe[347] * fftmag[173] + melcoe[348] * fftmag[174] +
- melcoe[349] * fftmag[175] + melcoe[350] * fftmag[176] +
- melcoe[351] * fftmag[177] + melcoe[352] * fftmag[178] +
- melcoe[353] * fftmag[179] + melcoe[354] * fftmag[180] +
- melcoe[355] * fftmag[181] + melcoe[356] * fftmag[182];
- dout[70] = melcoe[357] * fftmag[177] + melcoe[358] * fftmag[178] +
- melcoe[359] * fftmag[179] + melcoe[360] * fftmag[180] +
- melcoe[361] * fftmag[181] + melcoe[362] * fftmag[182] +
- melcoe[363] * fftmag[183] + melcoe[364] * fftmag[184] +
- melcoe[365] * fftmag[185] + melcoe[366] * fftmag[186] +
- melcoe[367] * fftmag[187] + melcoe[368] * fftmag[188];
- dout[71] = melcoe[369] * fftmag[183] + melcoe[370] * fftmag[184] +
- melcoe[371] * fftmag[185] + melcoe[372] * fftmag[186] +
- melcoe[373] * fftmag[187] + melcoe[374] * fftmag[188] +
- melcoe[375] * fftmag[189] + melcoe[376] * fftmag[190] +
- melcoe[377] * fftmag[191] + melcoe[378] * fftmag[192] +
- melcoe[379] * fftmag[193] + melcoe[380] * fftmag[194] +
- melcoe[381] * fftmag[195];
- dout[72] = melcoe[382] * fftmag[189] + melcoe[383] * fftmag[190] +
- melcoe[384] * fftmag[191] + melcoe[385] * fftmag[192] +
- melcoe[386] * fftmag[193] + melcoe[387] * fftmag[194] +
- melcoe[388] * fftmag[195] + melcoe[389] * fftmag[196] +
- melcoe[390] * fftmag[197] + melcoe[391] * fftmag[198] +
- melcoe[392] * fftmag[199] + melcoe[393] * fftmag[200] +
- melcoe[394] * fftmag[201] + melcoe[395] * fftmag[202];
- dout[73] = melcoe[396] * fftmag[196] + melcoe[397] * fftmag[197] +
- melcoe[398] * fftmag[198] + melcoe[399] * fftmag[199] +
- melcoe[400] * fftmag[200] + melcoe[401] * fftmag[201] +
- melcoe[402] * fftmag[202] + melcoe[403] * fftmag[203] +
- melcoe[404] * fftmag[204] + melcoe[405] * fftmag[205] +
- melcoe[406] * fftmag[206] + melcoe[407] * fftmag[207] +
- melcoe[408] * fftmag[208] + melcoe[409] * fftmag[209];
- dout[74] = melcoe[410] * fftmag[203] + melcoe[411] * fftmag[204] +
- melcoe[412] * fftmag[205] + melcoe[413] * fftmag[206] +
- melcoe[414] * fftmag[207] + melcoe[415] * fftmag[208] +
- melcoe[416] * fftmag[209] + melcoe[417] * fftmag[210] +
- melcoe[418] * fftmag[211] + melcoe[419] * fftmag[212] +
- melcoe[420] * fftmag[213] + melcoe[421] * fftmag[214] +
- melcoe[422] * fftmag[215] + melcoe[423] * fftmag[216];
- dout[75] = melcoe[424] * fftmag[210] + melcoe[425] * fftmag[211] +
- melcoe[426] * fftmag[212] + melcoe[427] * fftmag[213] +
- melcoe[428] * fftmag[214] + melcoe[429] * fftmag[215] +
- melcoe[430] * fftmag[216] + melcoe[431] * fftmag[217] +
- melcoe[432] * fftmag[218] + melcoe[433] * fftmag[219] +
- melcoe[434] * fftmag[220] + melcoe[435] * fftmag[221] +
- melcoe[436] * fftmag[222] + melcoe[437] * fftmag[223];
- dout[76] = melcoe[438] * fftmag[217] + melcoe[439] * fftmag[218] +
- melcoe[440] * fftmag[219] + melcoe[441] * fftmag[220] +
- melcoe[442] * fftmag[221] + melcoe[443] * fftmag[222] +
- melcoe[444] * fftmag[223] + melcoe[445] * fftmag[224] +
- melcoe[446] * fftmag[225] + melcoe[447] * fftmag[226] +
- melcoe[448] * fftmag[227] + melcoe[449] * fftmag[228] +
- melcoe[450] * fftmag[229] + melcoe[451] * fftmag[230] +
- melcoe[452] * fftmag[231];
- dout[77] = melcoe[453] * fftmag[224] + melcoe[454] * fftmag[225] +
- melcoe[455] * fftmag[226] + melcoe[456] * fftmag[227] +
- melcoe[457] * fftmag[228] + melcoe[458] * fftmag[229] +
- melcoe[459] * fftmag[230] + melcoe[460] * fftmag[231] +
- melcoe[461] * fftmag[232] + melcoe[462] * fftmag[233] +
- melcoe[463] * fftmag[234] + melcoe[464] * fftmag[235] +
- melcoe[465] * fftmag[236] + melcoe[466] * fftmag[237] +
- melcoe[467] * fftmag[238] + melcoe[468] * fftmag[239];
- dout[78] = melcoe[469] * fftmag[232] + melcoe[470] * fftmag[233] +
- melcoe[471] * fftmag[234] + melcoe[472] * fftmag[235] +
- melcoe[473] * fftmag[236] + melcoe[474] * fftmag[237] +
- melcoe[475] * fftmag[238] + melcoe[476] * fftmag[239] +
- melcoe[477] * fftmag[240] + melcoe[478] * fftmag[241] +
- melcoe[479] * fftmag[242] + melcoe[480] * fftmag[243] +
- melcoe[481] * fftmag[244] + melcoe[482] * fftmag[245] +
- melcoe[483] * fftmag[246] + melcoe[484] * fftmag[247];
- dout[79] = melcoe[485] * fftmag[240] + melcoe[486] * fftmag[241] +
- melcoe[487] * fftmag[242] + melcoe[488] * fftmag[243] +
- melcoe[489] * fftmag[244] + melcoe[490] * fftmag[245] +
- melcoe[491] * fftmag[246] + melcoe[492] * fftmag[247] +
- melcoe[493] * fftmag[248] + melcoe[494] * fftmag[249] +
- melcoe[495] * fftmag[250] + melcoe[496] * fftmag[251] +
- melcoe[497] * fftmag[252] + melcoe[498] * fftmag[253] +
- melcoe[499] * fftmag[254] + melcoe[500] * fftmag[255];
- global_cmvn(dout);
-}
diff --git a/funasr/runtime/onnxruntime/src/FeatureExtract.h b/funasr/runtime/onnxruntime/src/FeatureExtract.h
deleted file mode 100644
index 8296253..0000000
--- a/funasr/runtime/onnxruntime/src/FeatureExtract.h
+++ /dev/null
@@ -1,35 +0,0 @@
-
-#ifndef FEATUREEXTRACT_H
-#define FEATUREEXTRACT_H
-
-#include <fftw3.h>
-#include <stdint.h>
-
-#include "FeatureQueue.h"
-#include "SpeechWrap.h"
-#include "Tensor.h"
-
-class FeatureExtract {
- private:
- SpeechWrap speech;
- FeatureQueue fqueue;
- int mode;
- int fft_size = 512;
- int window_size = 400;
- int window_shift = 160;
-
- //void fftw_init();
- void melspect(float *din, float *dout);
- void global_cmvn(float *din);
-
- public:
- FeatureExtract(int mode);
- ~FeatureExtract();
- int size();
- //int status();
- void reset();
- void insert(fftwf_plan plan, float *din, int len, int flag);
- bool fetch(Tensor<float> *&dout);
-};
-
-#endif
diff --git a/funasr/runtime/onnxruntime/src/FeatureQueue.cpp b/funasr/runtime/onnxruntime/src/FeatureQueue.cpp
deleted file mode 100644
index f07633b..0000000
--- a/funasr/runtime/onnxruntime/src/FeatureQueue.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#include "precomp.h"
-FeatureQueue::FeatureQueue()
-{
- buff = new Tensor<float>(67, 80);
- window_size = 67;
- buff_idx = 0;
-}
-
-FeatureQueue::~FeatureQueue()
-{
- delete buff;
-}
-
-void FeatureQueue::reinit(int size)
-{
- delete buff;
- buff = new Tensor<float>(size, 80);
- buff_idx = 0;
- window_size = size;
-}
-
-void FeatureQueue::reset()
-{
- buff_idx = 0;
-}
-
-void FeatureQueue::push(float *din, int flag)
-{
- int offset = buff_idx * 80;
- memcpy(buff->buff + offset, din, 80 * sizeof(float));
- buff_idx++;
-
- if (flag == S_END) {
- Tensor<float> *tmp = new Tensor<float>(buff_idx, 80);
- memcpy(tmp->buff, buff->buff, buff_idx * 80 * sizeof(float));
- feature_queue.push(tmp);
- buff_idx = 0;
- } else if (buff_idx == window_size) {
- feature_queue.push(buff);
- Tensor<float> *tmp = new Tensor<float>(window_size, 80);
- memcpy(tmp->buff, buff->buff + (window_size - 3) * 80,
- 3 * 80 * sizeof(float));
- buff_idx = 3;
- buff = tmp;
- }
-}
-
-Tensor<float> *FeatureQueue::pop()
-{
-
- Tensor<float> *tmp = feature_queue.front();
- feature_queue.pop();
- return tmp;
-}
-
-int FeatureQueue::size()
-{
- return feature_queue.size();
-}
diff --git a/funasr/runtime/onnxruntime/src/FeatureQueue.h b/funasr/runtime/onnxruntime/src/FeatureQueue.h
deleted file mode 100644
index be3360b..0000000
--- a/funasr/runtime/onnxruntime/src/FeatureQueue.h
+++ /dev/null
@@ -1,28 +0,0 @@
-
-#ifndef FEATUREQUEUE_H
-#define FEATUREQUEUE_H
-
-#include "Tensor.h"
-#include <queue>
-#include <stdint.h>
-using namespace std;
-
-
-class FeatureQueue {
- private:
- queue<Tensor<float> *> feature_queue;
- Tensor<float> *buff;
- int buff_idx;
- int window_size;
-
- public:
- FeatureQueue();
- ~FeatureQueue();
- void reinit(int size);
- void reset();
- void push(float *din, int flag);
- Tensor<float> *pop();
- int size();
-};
-
-#endif
diff --git a/funasr/runtime/onnxruntime/src/Model.cpp b/funasr/runtime/onnxruntime/src/Model.cpp
deleted file mode 100644
index 7ddb635..0000000
--- a/funasr/runtime/onnxruntime/src/Model.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-#include "precomp.h"
-
-Model *create_model(const char *path, int nThread, bool quantize)
-{
- Model *mm;
-
- mm = new paraformer::ModelImp(path, nThread, quantize);
-
- return mm;
-}
diff --git a/funasr/runtime/onnxruntime/src/SpeechWrap.cpp b/funasr/runtime/onnxruntime/src/SpeechWrap.cpp
deleted file mode 100644
index 60d0a2b..0000000
--- a/funasr/runtime/onnxruntime/src/SpeechWrap.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "precomp.h"
-
-SpeechWrap::SpeechWrap()
-{
- cache_size = 0;
-}
-
-SpeechWrap::~SpeechWrap()
-{
-}
-
-void SpeechWrap::reset()
-{
- cache_size = 0;
-}
-
-void SpeechWrap::load(float *din, int len)
-{
- in = din;
- in_size = len;
- total_size = cache_size + in_size;
-}
-
-int SpeechWrap::size()
-{
- return total_size;
-}
-
-void SpeechWrap::update(int offset)
-{
- int in_offset = offset - cache_size;
- cache_size = (total_size - offset);
- memcpy(cache, in + in_offset, cache_size * sizeof(float));
-}
-
-float &SpeechWrap::operator[](int i)
-{
- return i < cache_size ? cache[i] : in[i - cache_size];
-}
diff --git a/funasr/runtime/onnxruntime/src/SpeechWrap.h b/funasr/runtime/onnxruntime/src/SpeechWrap.h
deleted file mode 100644
index 5d3ee40..0000000
--- a/funasr/runtime/onnxruntime/src/SpeechWrap.h
+++ /dev/null
@@ -1,26 +0,0 @@
-
-#ifndef SPEECHWRAP_H
-#define SPEECHWRAP_H
-
-#include <stdint.h>
-
-class SpeechWrap {
- private:
- float cache[400];
- int cache_size;
- float *in;
- int in_size;
- int total_size;
- int next_cache_size;
-
- public:
- SpeechWrap();
- ~SpeechWrap();
- void load(float *din, int len);
- void update(int offset);
- void reset();
- int size();
- float &operator[](int i);
-};
-
-#endif
diff --git a/funasr/runtime/onnxruntime/src/Vocab.h b/funasr/runtime/onnxruntime/src/Vocab.h
deleted file mode 100644
index 023671a..0000000
--- a/funasr/runtime/onnxruntime/src/Vocab.h
+++ /dev/null
@@ -1,25 +0,0 @@
-
-#ifndef VOCAB_H
-#define VOCAB_H
-
-#include <stdint.h>
-#include <string>
-#include <vector>
-using namespace std;
-
-class Vocab {
- private:
- vector<string> vocab;
- bool isChinese(string ch);
- bool isEnglish(string ch);
- void loadVocabFromYaml(const char* filename);
-
- public:
- Vocab(const char *filename);
- ~Vocab();
- int size();
- string vector2string(vector<int> in);
- string vector2stringV2(vector<int> in);
-};
-
-#endif
diff --git a/funasr/runtime/onnxruntime/src/alignedmem.cpp b/funasr/runtime/onnxruntime/src/alignedmem.cpp
index e174afe..d3e4b82 100644
--- a/funasr/runtime/onnxruntime/src/alignedmem.cpp
+++ b/funasr/runtime/onnxruntime/src/alignedmem.cpp
@@ -1,5 +1,5 @@
#include "precomp.h"
-void *aligned_malloc(size_t alignment, size_t required_bytes)
+void *AlignedMalloc(size_t alignment, size_t required_bytes)
{
void *p1; // original block
void **p2; // aligned block
@@ -12,7 +12,7 @@
return p2;
}
-void aligned_free(void *p)
+void AlignedFree(void *p)
{
free(((void **)p)[-1]);
}
diff --git a/funasr/runtime/onnxruntime/src/alignedmem.h b/funasr/runtime/onnxruntime/src/alignedmem.h
index dca68f4..e2b640a 100644
--- a/funasr/runtime/onnxruntime/src/alignedmem.h
+++ b/funasr/runtime/onnxruntime/src/alignedmem.h
@@ -2,9 +2,7 @@
#ifndef ALIGNEDMEM_H
#define ALIGNEDMEM_H
-
-
-extern void *aligned_malloc(size_t alignment, size_t required_bytes);
-extern void aligned_free(void *p);
+extern void *AlignedMalloc(size_t alignment, size_t required_bytes);
+extern void AlignedFree(void *p);
#endif
diff --git a/funasr/runtime/onnxruntime/src/Audio.cpp b/funasr/runtime/onnxruntime/src/audio.cpp
similarity index 76%
rename from funasr/runtime/onnxruntime/src/Audio.cpp
rename to funasr/runtime/onnxruntime/src/audio.cpp
index 38b6de8..ef48fa1 100644
--- a/funasr/runtime/onnxruntime/src/Audio.cpp
+++ b/funasr/runtime/onnxruntime/src/audio.cpp
@@ -6,7 +6,7 @@
#include <fstream>
#include <assert.h>
-#include "Audio.h"
+#include "audio.h"
#include "precomp.h"
using namespace std;
@@ -128,39 +128,30 @@
start = 0;
};
AudioFrame::~AudioFrame(){};
-int AudioFrame::set_start(int val)
+int AudioFrame::SetStart(int val)
{
start = val < 0 ? 0 : val;
return start;
};
-int AudioFrame::set_end(int val, int max_len)
+int AudioFrame::SetEnd(int val)
{
-
- float num_samples = val - start;
- float frame_length = 400;
- float frame_shift = 160;
- float num_new_samples =
- ceil((num_samples - frame_length) / frame_shift) * frame_shift + frame_length;
-
- end = start + num_new_samples;
- len = (int)num_new_samples;
- if (end > max_len)
- printf("frame end > max_len!!!!!!!\n");
+ end = val;
+ len = end - start;
return end;
};
-int AudioFrame::get_start()
+int AudioFrame::GetStart()
{
return start;
};
-int AudioFrame::get_len()
+int AudioFrame::GetLen()
{
return len;
};
-int AudioFrame::disp()
+int AudioFrame::Disp()
{
printf("not imp!!!!\n");
@@ -194,27 +185,27 @@
}
}
-void Audio::disp()
+void Audio::Disp()
{
- printf("Audio time is %f s. len is %d\n", (float)speech_len / model_sample_rate,
+ printf("Audio time is %f s. len is %d\n", (float)speech_len / MODEL_SAMPLE_RATE,
speech_len);
}
-float Audio::get_time_len()
+float Audio::GetTimeLen()
{
- return (float)speech_len / model_sample_rate;
+ return (float)speech_len / MODEL_SAMPLE_RATE;
}
-void Audio::wavResample(int32_t sampling_rate, const float *waveform,
+void Audio::WavResample(int32_t sampling_rate, const float *waveform,
int32_t n)
{
printf(
"Creating a resampler:\n"
" in_sample_rate: %d\n"
" output_sample_rate: %d\n",
- sampling_rate, static_cast<int32_t>(model_sample_rate));
+ sampling_rate, static_cast<int32_t>(MODEL_SAMPLE_RATE));
float min_freq =
- std::min<int32_t>(sampling_rate, model_sample_rate);
+ std::min<int32_t>(sampling_rate, MODEL_SAMPLE_RATE);
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
int32_t lowpass_filter_width = 6;
@@ -222,7 +213,7 @@
//auto resampler = new LinearResample(
// sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
auto resampler = std::make_unique<LinearResample>(
- sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
+ sampling_rate, MODEL_SAMPLE_RATE, lowpass_cutoff, lowpass_filter_width);
std::vector<float> samples;
resampler->Resample(waveform, n, true, &samples);
//reset speech_data
@@ -235,7 +226,7 @@
copy(samples.begin(), samples.end(), speech_data);
}
-bool Audio::loadwav(const char *filename, int32_t* sampling_rate)
+bool Audio::LoadWav(const char *filename, int32_t* sampling_rate)
{
WaveHeader header;
if (speech_data != NULL) {
@@ -279,8 +270,8 @@
}
//resample
- if(*sampling_rate != model_sample_rate){
- wavResample(*sampling_rate, speech_data, speech_len);
+ if(*sampling_rate != MODEL_SAMPLE_RATE){
+ WavResample(*sampling_rate, speech_data, speech_len);
}
AudioFrame* frame = new AudioFrame(speech_len);
@@ -292,7 +283,7 @@
return false;
}
-bool Audio::loadwav(const char* buf, int nFileLen, int32_t* sampling_rate)
+bool Audio::LoadWav(const char* buf, int n_file_len, int32_t* sampling_rate)
{
WaveHeader header;
if (speech_data != NULL) {
@@ -326,8 +317,8 @@
}
//resample
- if(*sampling_rate != model_sample_rate){
- wavResample(*sampling_rate, speech_data, speech_len);
+ if(*sampling_rate != MODEL_SAMPLE_RATE){
+ WavResample(*sampling_rate, speech_data, speech_len);
}
AudioFrame* frame = new AudioFrame(speech_len);
@@ -339,7 +330,7 @@
return false;
}
-bool Audio::loadpcmwav(const char* buf, int nBufLen, int32_t* sampling_rate)
+bool Audio::LoadPcmwav(const char* buf, int n_buf_len, int32_t* sampling_rate)
{
if (speech_data != NULL) {
free(speech_data);
@@ -349,7 +340,7 @@
}
offset = 0;
- speech_len = nBufLen / 2;
+ speech_len = n_buf_len / 2;
speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
if (speech_buff)
{
@@ -369,8 +360,8 @@
}
//resample
- if(*sampling_rate != model_sample_rate){
- wavResample(*sampling_rate, speech_data, speech_len);
+ if(*sampling_rate != MODEL_SAMPLE_RATE){
+ WavResample(*sampling_rate, speech_data, speech_len);
}
AudioFrame* frame = new AudioFrame(speech_len);
@@ -382,7 +373,7 @@
return false;
}
-bool Audio::loadpcmwav(const char* filename, int32_t* sampling_rate)
+bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate)
{
if (speech_data != NULL) {
free(speech_data);
@@ -397,10 +388,10 @@
if (fp == nullptr)
return false;
fseek(fp, 0, SEEK_END);
- uint32_t nFileLen = ftell(fp);
+ uint32_t n_file_len = ftell(fp);
fseek(fp, 0, SEEK_SET);
- speech_len = (nFileLen) / 2;
+ speech_len = (n_file_len) / 2;
speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
if (speech_buff)
{
@@ -420,8 +411,8 @@
}
//resample
- if(*sampling_rate != model_sample_rate){
- wavResample(*sampling_rate, speech_data, speech_len);
+ if(*sampling_rate != MODEL_SAMPLE_RATE){
+ WavResample(*sampling_rate, speech_data, speech_len);
}
AudioFrame* frame = new AudioFrame(speech_len);
@@ -434,7 +425,7 @@
}
-int Audio::fetch_chunck(float *&dout, int len)
+int Audio::FetchChunck(float *&dout, int len)
{
if (offset >= speech_align_len) {
dout = NULL;
@@ -455,14 +446,14 @@
}
}
-int Audio::fetch(float *&dout, int &len, int &flag)
+int Audio::Fetch(float *&dout, int &len, int &flag)
{
if (frame_queue.size() > 0) {
AudioFrame *frame = frame_queue.front();
frame_queue.pop();
- dout = speech_data + frame->get_start();
- len = frame->get_len();
+ dout = speech_data + frame->GetStart();
+ len = frame->GetLen();
delete frame;
flag = S_END;
return 1;
@@ -471,9 +462,8 @@
}
}
-void Audio::padding()
+void Audio::Padding()
{
-
float num_samples = speech_len;
float frame_length = 400;
float frame_shift = 160;
@@ -509,71 +499,27 @@
delete frame;
}
-#define UNTRIGGERED 0
-#define TRIGGERED 1
-
-#define SPEECH_LEN_5S (16000 * 5)
-#define SPEECH_LEN_10S (16000 * 10)
-#define SPEECH_LEN_20S (16000 * 20)
-#define SPEECH_LEN_30S (16000 * 30)
-
-/*
-void Audio::split()
+void Audio::Split(Model* recog_obj)
{
- VadInst *handle = WebRtcVad_Create();
- WebRtcVad_Init(handle);
- WebRtcVad_set_mode(handle, 2);
- int window_size = 10;
- AudioWindow audiowindow(window_size);
- int status = UNTRIGGERED;
- int offset = 0;
- int fs = 16000;
- int step = 480;
-
AudioFrame *frame;
frame = frame_queue.front();
frame_queue.pop();
+ int sp_len = frame->GetLen();
delete frame;
frame = NULL;
- while (offset < speech_len - step) {
- int n = WebRtcVad_Process(handle, fs, speech_buff + offset, step);
- if (status == UNTRIGGERED && audiowindow.put(n) >= window_size - 1) {
- frame = new AudioFrame();
- int start = offset - step * (window_size - 1);
- frame->set_start(start);
- status = TRIGGERED;
- } else if (status == TRIGGERED) {
- int win_weight = audiowindow.put(n);
- int voice_len = (offset - frame->get_start());
- int gap = 0;
- if (voice_len < SPEECH_LEN_5S) {
- offset += step;
- continue;
- } else if (voice_len < SPEECH_LEN_10S) {
- gap = 1;
- } else if (voice_len < SPEECH_LEN_20S) {
- gap = window_size / 5;
- } else {
- gap = window_size / 2;
- }
-
- if (win_weight < gap) {
- status = UNTRIGGERED;
- offset = frame->set_end(offset, speech_align_len);
- frame_queue.push(frame);
- frame = NULL;
- }
- }
- offset += step;
- }
-
- if (frame != NULL) {
- frame->set_end(speech_len, speech_align_len);
+ std::vector<float> pcm_data(speech_data, speech_data+sp_len);
+ vector<std::vector<int>> vad_segments = recog_obj->VadSeg(pcm_data);
+ int seg_sample = MODEL_SAMPLE_RATE/1000;
+ for(vector<int> segment:vad_segments)
+ {
+ frame = new AudioFrame();
+ int start = segment[0]*seg_sample;
+ int end = segment[1]*seg_sample;
+ frame->SetStart(start);
+ frame->SetEnd(end);
frame_queue.push(frame);
frame = NULL;
}
- WebRtcVad_Free(handle);
}
-*/
\ No newline at end of file
diff --git a/funasr/runtime/onnxruntime/src/CommonStruct.h b/funasr/runtime/onnxruntime/src/common-struct.h
similarity index 100%
rename from funasr/runtime/onnxruntime/src/CommonStruct.h
rename to funasr/runtime/onnxruntime/src/common-struct.h
diff --git a/funasr/runtime/onnxruntime/src/commonfunc.h b/funasr/runtime/onnxruntime/src/commonfunc.h
index 5198030..fbbda74 100644
--- a/funasr/runtime/onnxruntime/src/commonfunc.h
+++ b/funasr/runtime/onnxruntime/src/commonfunc.h
@@ -1,6 +1,5 @@
#pragma once
-
-
+#include <algorithm>
typedef struct
{
std::string msg;
@@ -11,46 +10,45 @@
#ifdef _WIN32
#include <codecvt>
-
-
-inline std::wstring string2wstring(const std::string& str, const std::string& locale)
+inline std::wstring String2wstring(const std::string& str, const std::string& locale)
{
typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
std::wstring_convert<F> strCnv(new F(locale));
return strCnv.from_bytes(str);
}
-inline std::wstring strToWstr(std::string str) {
+inline std::wstring StrToWstr(std::string str) {
if (str.length() == 0)
return L"";
- return string2wstring(str, "zh-CN");
+ return String2wstring(str, "zh-CN");
}
#endif
-
-
-inline void getInputName(Ort::Session* session, string& inputName,int nIndex=0) {
+inline void GetInputName(Ort::Session* session, string& inputName,int nIndex=0) {
size_t numInputNodes = session->GetInputCount();
if (numInputNodes > 0) {
Ort::AllocatorWithDefaultOptions allocator;
{
auto t = session->GetInputNameAllocated(nIndex, allocator);
inputName = t.get();
-
}
}
}
-inline void getOutputName(Ort::Session* session, string& outputName, int nIndex = 0) {
+inline void GetOutputName(Ort::Session* session, string& outputName, int nIndex = 0) {
size_t numOutputNodes = session->GetOutputCount();
if (numOutputNodes > 0) {
Ort::AllocatorWithDefaultOptions allocator;
{
auto t = session->GetOutputNameAllocated(nIndex, allocator);
outputName = t.get();
-
}
}
}
+
+template <class ForwardIterator>
+inline static size_t Argmax(ForwardIterator first, ForwardIterator last) {
+ return std::distance(first, std::max_element(first, last));
+}
diff --git a/funasr/runtime/onnxruntime/src/ct-transformer.cpp b/funasr/runtime/onnxruntime/src/ct-transformer.cpp
new file mode 100644
index 0000000..3d66dcd
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/ct-transformer.cpp
@@ -0,0 +1,188 @@
+#include "precomp.h"
+
+CTTransformer::CTTransformer(const char* sz_model_dir, int thread_num)
+:env_(ORT_LOGGING_LEVEL_ERROR, ""),session_options{}
+{
+ session_options.SetIntraOpNumThreads(thread_num);
+ session_options.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
+ session_options.DisableCpuMemArena();
+
+ string strModelPath = PathAppend(sz_model_dir, PUNC_MODEL_FILE);
+ string strYamlPath = PathAppend(sz_model_dir, PUNC_YAML_FILE);
+
+ try{
+#ifdef _WIN32
+ std::wstring detPath = strToWstr(strModelPath);
+ m_session = std::make_unique<Ort::Session>(env_, detPath.c_str(), session_options);
+#else
+ m_session = std::make_unique<Ort::Session>(env_, strModelPath.c_str(), session_options);
+#endif
+ }
+ catch(exception e)
+ {
+ printf(e.what());
+ }
+ // read inputnames outputnamess
+ string strName;
+ GetInputName(m_session.get(), strName);
+ m_strInputNames.push_back(strName.c_str());
+ GetInputName(m_session.get(), strName, 1);
+ m_strInputNames.push_back(strName);
+
+ GetOutputName(m_session.get(), strName);
+ m_strOutputNames.push_back(strName);
+
+ for (auto& item : m_strInputNames)
+ m_szInputNames.push_back(item.c_str());
+ for (auto& item : m_strOutputNames)
+ m_szOutputNames.push_back(item.c_str());
+
+ m_tokenizer.OpenYaml(strYamlPath.c_str());
+}
+
+CTTransformer::~CTTransformer()
+{
+}
+
+string CTTransformer::AddPunc(const char* sz_input)
+{
+ string strResult;
+ vector<string> strOut;
+ vector<int> InputData;
+ m_tokenizer.Tokenize(sz_input, strOut, InputData);
+
+ int nTotalBatch = ceil((float)InputData.size() / TOKEN_LEN);
+ int nCurBatch = -1;
+ int nSentEnd = -1, nLastCommaIndex = -1;
+ vector<int64_t> RemainIDs; //
+ vector<string> RemainStr; //
+ vector<int> NewPunctuation; //
+ vector<string> NewString; //
+ vector<string> NewSentenceOut;
+ vector<int> NewPuncOut;
+ int nDiff = 0;
+ for (size_t i = 0; i < InputData.size(); i += TOKEN_LEN)
+ {
+ nDiff = (i + TOKEN_LEN) < InputData.size() ? (0) : (i + TOKEN_LEN - InputData.size());
+ vector<int64_t> InputIDs(InputData.begin() + i, InputData.begin() + i + TOKEN_LEN - nDiff);
+ vector<string> InputStr(strOut.begin() + i, strOut.begin() + i + TOKEN_LEN - nDiff);
+ InputIDs.insert(InputIDs.begin(), RemainIDs.begin(), RemainIDs.end()); // RemainIDs+InputIDs;
+ InputStr.insert(InputStr.begin(), RemainStr.begin(), RemainStr.end()); // RemainStr+InputStr;
+
+ auto Punction = Infer(InputIDs);
+ nCurBatch = i / TOKEN_LEN;
+ if (nCurBatch < nTotalBatch - 1) // not the last minisetence
+ {
+ nSentEnd = -1;
+ nLastCommaIndex = -1;
+ for (int nIndex = Punction.size() - 2; nIndex > 0; nIndex--)
+ {
+ if (m_tokenizer.Id2Punc(Punction[nIndex]) == m_tokenizer.Id2Punc(PERIOD_INDEX) || m_tokenizer.Id2Punc(Punction[nIndex]) == m_tokenizer.Id2Punc(QUESTION_INDEX))
+ {
+ nSentEnd = nIndex;
+ break;
+ }
+ if (nLastCommaIndex < 0 && m_tokenizer.Id2Punc(Punction[nIndex]) == m_tokenizer.Id2Punc(COMMA_INDEX))
+ {
+ nLastCommaIndex = nIndex;
+ }
+ }
+ if (nSentEnd < 0 && InputStr.size() > CACHE_POP_TRIGGER_LIMIT && nLastCommaIndex > 0)
+ {
+ nSentEnd = nLastCommaIndex;
+ Punction[nSentEnd] = PERIOD_INDEX;
+ }
+ RemainStr.assign(InputStr.begin() + nSentEnd + 1, InputStr.end());
+ RemainIDs.assign(InputIDs.begin() + nSentEnd + 1, InputIDs.end());
+ InputStr.assign(InputStr.begin(), InputStr.begin() + nSentEnd + 1); // minit_sentence
+ Punction.assign(Punction.begin(), Punction.begin() + nSentEnd + 1);
+ }
+
+ NewPunctuation.insert(NewPunctuation.end(), Punction.begin(), Punction.end());
+ vector<string> WordWithPunc;
+ for (int i = 0; i < InputStr.size(); i++)
+ {
+ if (i > 0 && !(InputStr[i][0] & 0x80) && (i + 1) <InputStr.size() && !(InputStr[i+1][0] & 0x80))// 锟叫硷拷锟接拷模锟�
+ {
+ InputStr[i] = InputStr[i]+ " ";
+ }
+ WordWithPunc.push_back(InputStr[i]);
+
+ if (Punction[i] != NOTPUNC_INDEX) // 锟铰伙拷锟斤拷
+ {
+ WordWithPunc.push_back(m_tokenizer.Id2Punc(Punction[i]));
+ }
+ }
+
+ NewString.insert(NewString.end(), WordWithPunc.begin(), WordWithPunc.end()); // new_mini_sentence += "".join(words_with_punc)
+ NewSentenceOut = NewString;
+ NewPuncOut = NewPunctuation;
+ // last mini sentence
+ if(nCurBatch == nTotalBatch - 1)
+ {
+ if (NewString[NewString.size() - 1] == m_tokenizer.Id2Punc(COMMA_INDEX) || NewString[NewString.size() - 1] == m_tokenizer.Id2Punc(DUN_INDEX))
+ {
+ NewSentenceOut.assign(NewString.begin(), NewString.end() - 1);
+ NewSentenceOut.push_back(m_tokenizer.Id2Punc(PERIOD_INDEX));
+ NewPuncOut.assign(NewPunctuation.begin(), NewPunctuation.end() - 1);
+ NewPuncOut.push_back(PERIOD_INDEX);
+ }
+ else if (NewString[NewString.size() - 1] == m_tokenizer.Id2Punc(PERIOD_INDEX) && NewString[NewString.size() - 1] == m_tokenizer.Id2Punc(QUESTION_INDEX))
+ {
+ NewSentenceOut = NewString;
+ NewSentenceOut.push_back(m_tokenizer.Id2Punc(PERIOD_INDEX));
+ NewPuncOut = NewPunctuation;
+ NewPuncOut.push_back(PERIOD_INDEX);
+ }
+ }
+ }
+ for (auto& item : NewSentenceOut)
+ strResult += item;
+ return strResult;
+}
+
+vector<int> CTTransformer::Infer(vector<int64_t> input_data)
+{
+ Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+ vector<int> punction;
+ std::array<int64_t, 2> input_shape_{ 1, (int64_t)input_data.size()};
+ Ort::Value onnx_input = Ort::Value::CreateTensor<int64_t>(m_memoryInfo,
+ input_data.data(),
+ input_data.size(),
+ input_shape_.data(),
+ input_shape_.size());
+
+ std::array<int32_t,1> text_lengths{ (int32_t)input_data.size() };
+ std::array<int64_t,1> text_lengths_dim{ 1 };
+ Ort::Value onnx_text_lengths = Ort::Value::CreateTensor(
+ m_memoryInfo,
+ text_lengths.data(),
+ text_lengths.size() * sizeof(int32_t),
+ text_lengths_dim.data(),
+ text_lengths_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32);
+ std::vector<Ort::Value> input_onnx;
+ input_onnx.emplace_back(std::move(onnx_input));
+ input_onnx.emplace_back(std::move(onnx_text_lengths));
+
+ try {
+ auto outputTensor = m_session->Run(Ort::RunOptions{nullptr}, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size());
+ std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
+
+ int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
+ float * floatData = outputTensor[0].GetTensorMutableData<float>();
+
+ for (int i = 0; i < outputCount; i += CANDIDATE_NUM)
+ {
+ int index = Argmax(floatData + i, floatData + i + CANDIDATE_NUM-1);
+ punction.push_back(index);
+ }
+ }
+ catch (std::exception const &e)
+ {
+ printf(e.what());
+ }
+ return punction;
+}
+
+
+
diff --git a/funasr/runtime/onnxruntime/src/ct-transformer.h b/funasr/runtime/onnxruntime/src/ct-transformer.h
new file mode 100644
index 0000000..77972c7
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/ct-transformer.h
@@ -0,0 +1,26 @@
+#pragma once
+
+class CTTransformer {
+/**
+ * Author: Speech Lab of DAMO Academy, Alibaba Group
+ * CT-Transformer: Controllable time-delay transformer for real-time punctuation prediction and disfluency detection
+ * https://arxiv.org/pdf/2003.01309.pdf
+*/
+
+private:
+
+ CTokenizer m_tokenizer;
+ vector<string> m_strInputNames, m_strOutputNames;
+ vector<const char*> m_szInputNames;
+ vector<const char*> m_szOutputNames;
+
+ std::shared_ptr<Ort::Session> m_session;
+ Ort::Env env_;
+ Ort::SessionOptions session_options;
+public:
+
+ CTTransformer(const char* sz_model_dir, int thread_num);
+ ~CTTransformer();
+ vector<int> Infer(vector<int64_t> input_data);
+ string AddPunc(const char* sz_input);
+};
diff --git a/funasr/runtime/onnxruntime/src/e2e-vad.h b/funasr/runtime/onnxruntime/src/e2e-vad.h
new file mode 100644
index 0000000..e029dc3
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/e2e-vad.h
@@ -0,0 +1,797 @@
+
+#include <utility>
+#include <vector>
+#include <string>
+#include <map>
+#include <cmath>
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <cassert>
+
+
+enum class VadStateMachine {
+ kVadInStateStartPointNotDetected = 1,
+ kVadInStateInSpeechSegment = 2,
+ kVadInStateEndPointDetected = 3
+};
+
+enum class FrameState {
+ kFrameStateInvalid = -1,
+ kFrameStateSpeech = 1,
+ kFrameStateSil = 0
+};
+
+// final voice/unvoice state per frame
+enum class AudioChangeState {
+ kChangeStateSpeech2Speech = 0,
+ kChangeStateSpeech2Sil = 1,
+ kChangeStateSil2Sil = 2,
+ kChangeStateSil2Speech = 3,
+ kChangeStateNoBegin = 4,
+ kChangeStateInvalid = 5
+};
+
+enum class VadDetectMode {
+ kVadSingleUtteranceDetectMode = 0,
+ kVadMutipleUtteranceDetectMode = 1
+};
+
+class VADXOptions {
+public:
+ int sample_rate;
+ int detect_mode;
+ int snr_mode;
+ int max_end_silence_time;
+ int max_start_silence_time;
+ bool do_start_point_detection;
+ bool do_end_point_detection;
+ int window_size_ms;
+ int sil_to_speech_time_thres;
+ int speech_to_sil_time_thres;
+ float speech_2_noise_ratio;
+ int do_extend;
+ int lookback_time_start_point;
+ int lookahead_time_end_point;
+ int max_single_segment_time;
+ int nn_eval_block_size;
+ int dcd_block_size;
+ float snr_thres;
+ int noise_frame_num_used_for_snr;
+ float decibel_thres;
+ float speech_noise_thres;
+ float fe_prior_thres;
+ int silence_pdf_num;
+ std::vector<int> sil_pdf_ids;
+ float speech_noise_thresh_low;
+ float speech_noise_thresh_high;
+ bool output_frame_probs;
+ int frame_in_ms;
+ int frame_length_ms;
+
+ explicit VADXOptions(
+ int sr = 16000,
+ int dm = static_cast<int>(VadDetectMode::kVadMutipleUtteranceDetectMode),
+ int sm = 0,
+ int mset = 800,
+ int msst = 3000,
+ bool dspd = true,
+ bool depd = true,
+ int wsm = 200,
+ int ststh = 150,
+ int sttsh = 150,
+ float s2nr = 1.0,
+ int de = 1,
+ int lbtps = 200,
+ int latsp = 100,
+ int mss = 15000,
+ int nebs = 8,
+ int dbs = 4,
+ float st = -100.0,
+ int nfnus = 100,
+ float dt = -100.0,
+ float snt = 0.9,
+ float fept = 1e-4,
+ int spn = 1,
+ std::vector<int> spids = {0},
+ float sntl = -0.1,
+ float snth = 0.3,
+ bool ofp = false,
+ int fim = 10,
+ int flm = 25
+ ) :
+ sample_rate(sr),
+ detect_mode(dm),
+ snr_mode(sm),
+ max_end_silence_time(mset),
+ max_start_silence_time(msst),
+ do_start_point_detection(dspd),
+ do_end_point_detection(depd),
+ window_size_ms(wsm),
+ sil_to_speech_time_thres(ststh),
+ speech_to_sil_time_thres(sttsh),
+ speech_2_noise_ratio(s2nr),
+ do_extend(de),
+ lookback_time_start_point(lbtps),
+ lookahead_time_end_point(latsp),
+ max_single_segment_time(mss),
+ nn_eval_block_size(nebs),
+ dcd_block_size(dbs),
+ snr_thres(st),
+ noise_frame_num_used_for_snr(nfnus),
+ decibel_thres(dt),
+ speech_noise_thres(snt),
+ fe_prior_thres(fept),
+ silence_pdf_num(spn),
+ sil_pdf_ids(std::move(spids)),
+ speech_noise_thresh_low(sntl),
+ speech_noise_thresh_high(snth),
+ output_frame_probs(ofp),
+ frame_in_ms(fim),
+ frame_length_ms(flm) {}
+};
+
+class E2EVadSpeechBufWithDoa {
+public:
+ int start_ms;
+ int end_ms;
+ std::vector<float> buffer;
+ bool contain_seg_start_point;
+ bool contain_seg_end_point;
+ int doa;
+
+ E2EVadSpeechBufWithDoa() :
+ start_ms(0),
+ end_ms(0),
+ buffer(),
+ contain_seg_start_point(false),
+ contain_seg_end_point(false),
+ doa(0) {}
+
+ void Reset() {
+ start_ms = 0;
+ end_ms = 0;
+ buffer.clear();
+ contain_seg_start_point = false;
+ contain_seg_end_point = false;
+ doa = 0;
+ }
+};
+
+class E2EVadFrameProb {
+public:
+ double noise_prob;
+ double speech_prob;
+ double score;
+ int frame_id;
+ int frm_state;
+
+ E2EVadFrameProb() :
+ noise_prob(0.0),
+ speech_prob(0.0),
+ score(0.0),
+ frame_id(0),
+ frm_state(0) {}
+};
+
+class WindowDetector {
+public:
+ int window_size_ms;
+ int sil_to_speech_time;
+ int speech_to_sil_time;
+ int frame_size_ms;
+ int win_size_frame;
+ int win_sum;
+ std::vector<int> win_state;
+ int cur_win_pos;
+ FrameState pre_frame_state;
+ FrameState cur_frame_state;
+ int sil_to_speech_frmcnt_thres;
+ int speech_to_sil_frmcnt_thres;
+ int voice_last_frame_count;
+ int noise_last_frame_count;
+ int hydre_frame_count;
+
+ WindowDetector(int window_size_ms, int sil_to_speech_time, int speech_to_sil_time, int frame_size_ms) :
+ window_size_ms(window_size_ms),
+ sil_to_speech_time(sil_to_speech_time),
+ speech_to_sil_time(speech_to_sil_time),
+ frame_size_ms(frame_size_ms),
+ win_size_frame(window_size_ms / frame_size_ms),
+ win_sum(0),
+ win_state(std::vector<int>(win_size_frame, 0)),
+ cur_win_pos(0),
+ pre_frame_state(FrameState::kFrameStateSil),
+ cur_frame_state(FrameState::kFrameStateSil),
+ sil_to_speech_frmcnt_thres(sil_to_speech_time / frame_size_ms),
+ speech_to_sil_frmcnt_thres(speech_to_sil_time / frame_size_ms),
+ voice_last_frame_count(0),
+ noise_last_frame_count(0),
+ hydre_frame_count(0) {}
+
+ void Reset() {
+ cur_win_pos = 0;
+ win_sum = 0;
+ win_state = std::vector<int>(win_size_frame, 0);
+ pre_frame_state = FrameState::kFrameStateSil;
+ cur_frame_state = FrameState::kFrameStateSil;
+ voice_last_frame_count = 0;
+ noise_last_frame_count = 0;
+ hydre_frame_count = 0;
+ }
+
+ int GetWinSize() {
+ return win_size_frame;
+ }
+
+ AudioChangeState DetectOneFrame(FrameState frameState, int frame_count) {
+ int cur_frame_state = 0;
+ if (frameState == FrameState::kFrameStateSpeech) {
+ cur_frame_state = 1;
+ } else if (frameState == FrameState::kFrameStateSil) {
+ cur_frame_state = 0;
+ } else {
+ return AudioChangeState::kChangeStateInvalid;
+ }
+ win_sum -= win_state[cur_win_pos];
+ win_sum += cur_frame_state;
+ win_state[cur_win_pos] = cur_frame_state;
+ cur_win_pos = (cur_win_pos + 1) % win_size_frame;
+ if (pre_frame_state == FrameState::kFrameStateSil && win_sum >= sil_to_speech_frmcnt_thres) {
+ pre_frame_state = FrameState::kFrameStateSpeech;
+ return AudioChangeState::kChangeStateSil2Speech;
+ }
+ if (pre_frame_state == FrameState::kFrameStateSpeech && win_sum <= speech_to_sil_frmcnt_thres) {
+ pre_frame_state = FrameState::kFrameStateSil;
+ return AudioChangeState::kChangeStateSpeech2Sil;
+ }
+ if (pre_frame_state == FrameState::kFrameStateSil) {
+ return AudioChangeState::kChangeStateSil2Sil;
+ }
+ if (pre_frame_state == FrameState::kFrameStateSpeech) {
+ return AudioChangeState::kChangeStateSpeech2Speech;
+ }
+ return AudioChangeState::kChangeStateInvalid;
+ }
+
+ int FrameSizeMs() {
+ return frame_size_ms;
+ }
+};
+
+class E2EVadModel {
+public:
+ E2EVadModel() {
+ this->vad_opts = VADXOptions();
+// this->windows_detector = WindowDetector(200,150,150,10);
+ // this->encoder = encoder;
+ // init variables
+ this->is_final = false;
+ this->data_buf_start_frame = 0;
+ this->frm_cnt = 0;
+ this->latest_confirmed_speech_frame = 0;
+ this->lastest_confirmed_silence_frame = -1;
+ this->continous_silence_frame_count = 0;
+ this->vad_state_machine = VadStateMachine::kVadInStateStartPointNotDetected;
+ this->confirmed_start_frame = -1;
+ this->confirmed_end_frame = -1;
+ this->number_end_time_detected = 0;
+ this->sil_frame = 0;
+ this->sil_pdf_ids = this->vad_opts.sil_pdf_ids;
+ this->noise_average_decibel = -100.0;
+ this->pre_end_silence_detected = false;
+ this->next_seg = true;
+// this->output_data_buf = [];
+ this->output_data_buf_offset = 0;
+// this->frame_probs = [];
+ this->max_end_sil_frame_cnt_thresh =
+ this->vad_opts.max_end_silence_time - this->vad_opts.speech_to_sil_time_thres;
+ this->speech_noise_thres = this->vad_opts.speech_noise_thres;
+ this->max_time_out = false;
+// this->decibel = [];
+ this->ResetDetection();
+ }
+
+ std::vector<std::vector<int>>
+ operator()(const std::vector<std::vector<float>> &score, const std::vector<float> &waveform, bool is_final = false,
+ bool online = false, int max_end_sil = 800, int max_single_segment_time = 15000,
+ float speech_noise_thres = 0.8, int sample_rate = 16000) {
+ max_end_sil_frame_cnt_thresh = max_end_sil - vad_opts.speech_to_sil_time_thres;
+ this->waveform = waveform;
+ this->vad_opts.max_single_segment_time = max_single_segment_time;
+ this->vad_opts.speech_noise_thres = speech_noise_thres;
+ this->vad_opts.sample_rate = sample_rate;
+
+ ComputeDecibel();
+ ComputeScores(score);
+ if (!is_final) {
+ DetectCommonFrames();
+ } else {
+ DetectLastFrames();
+ }
+
+ std::vector<std::vector<int>> segment_batch;
+ if (output_data_buf.size() > 0) {
+ for (size_t i = output_data_buf_offset; i < output_data_buf.size(); i++) {
+ int start_ms;
+ int end_ms;
+ if (online) {
+
+ if (!output_data_buf[i].contain_seg_start_point) {
+ continue;
+ }
+ if (!next_seg && !output_data_buf[i].contain_seg_end_point) {
+ continue;
+ }
+ start_ms = next_seg ? output_data_buf[i].start_ms : -1;
+
+ if (output_data_buf[i].contain_seg_end_point) {
+ end_ms = output_data_buf[i].end_ms;
+ next_seg = true;
+ output_data_buf_offset += 1;
+ } else {
+ end_ms = -1;
+ next_seg = false;
+ }
+ } else {
+ if (!is_final &&
+ (!output_data_buf[i].contain_seg_start_point || !output_data_buf[i].contain_seg_end_point)) {
+ continue;
+ }
+ start_ms = output_data_buf[i].start_ms;
+ end_ms = output_data_buf[i].end_ms;
+ output_data_buf_offset += 1;
+ }
+ std::vector<int> segment = {start_ms, end_ms};
+ segment_batch.push_back(segment);
+ }
+ }
+
+ if (is_final) {
+ AllResetDetection();
+ }
+ return segment_batch;
+ }
+
+private:
+ VADXOptions vad_opts;
+ WindowDetector windows_detector = WindowDetector(200, 150, 150, 10);
+ bool is_final;
+ int data_buf_start_frame;
+ int frm_cnt;
+ int latest_confirmed_speech_frame;
+ int lastest_confirmed_silence_frame;
+ int continous_silence_frame_count;
+ VadStateMachine vad_state_machine;
+ int confirmed_start_frame;
+ int confirmed_end_frame;
+ int number_end_time_detected;
+ int sil_frame;
+ std::vector<int> sil_pdf_ids;
+ float noise_average_decibel;
+ bool pre_end_silence_detected;
+ bool next_seg;
+ std::vector<E2EVadSpeechBufWithDoa> output_data_buf;
+ int output_data_buf_offset;
+ std::vector<E2EVadFrameProb> frame_probs;
+ int max_end_sil_frame_cnt_thresh;
+ float speech_noise_thres;
+ std::vector<std::vector<float>> scores;
+ bool max_time_out;
+ std::vector<float> decibel;
+ std::vector<float> data_buf;
+ std::vector<float> data_buf_all;
+ std::vector<float> waveform;
+
+ void AllResetDetection() {
+ is_final = false;
+ data_buf_start_frame = 0;
+ frm_cnt = 0;
+ latest_confirmed_speech_frame = 0;
+ lastest_confirmed_silence_frame = -1;
+ continous_silence_frame_count = 0;
+ vad_state_machine = VadStateMachine::kVadInStateStartPointNotDetected;
+ confirmed_start_frame = -1;
+ confirmed_end_frame = -1;
+ number_end_time_detected = 0;
+ sil_frame = 0;
+ sil_pdf_ids = vad_opts.sil_pdf_ids;
+ noise_average_decibel = -100.0;
+ pre_end_silence_detected = false;
+ next_seg = true;
+ output_data_buf.clear();
+ output_data_buf_offset = 0;
+ frame_probs.clear();
+ max_end_sil_frame_cnt_thresh = vad_opts.max_end_silence_time - vad_opts.speech_to_sil_time_thres;
+ speech_noise_thres = vad_opts.speech_noise_thres;
+ scores.clear();
+ max_time_out = false;
+ decibel.clear();
+ data_buf.clear();
+ data_buf_all.clear();
+ waveform.clear();
+ ResetDetection();
+ }
+
+ void ResetDetection() {
+ continous_silence_frame_count = 0;
+ latest_confirmed_speech_frame = 0;
+ lastest_confirmed_silence_frame = -1;
+ confirmed_start_frame = -1;
+ confirmed_end_frame = -1;
+ vad_state_machine = VadStateMachine::kVadInStateStartPointNotDetected;
+ windows_detector.Reset();
+ sil_frame = 0;
+ frame_probs.clear();
+ }
+
+ void ComputeDecibel() {
+ int frame_sample_length = int(vad_opts.frame_length_ms * vad_opts.sample_rate / 1000);
+ int frame_shift_length = int(vad_opts.frame_in_ms * vad_opts.sample_rate / 1000);
+ if (data_buf_all.empty()) {
+ data_buf_all = waveform;
+ data_buf = data_buf_all;
+ } else {
+ data_buf_all.insert(data_buf_all.end(), waveform.begin(), waveform.end());
+ }
+ for (int offset = 0; offset < waveform.size() - frame_sample_length + 1; offset += frame_shift_length) {
+ float sum = 0.0;
+ for (int i = 0; i < frame_sample_length; i++) {
+ sum += waveform[offset + i] * waveform[offset + i];
+ }
+// float decibel = 10 * log10(sum + 0.000001);
+ this->decibel.push_back(10 * log10(sum + 0.000001));
+ }
+ }
+
+ void ComputeScores(const std::vector<std::vector<float>> &scores) {
+ vad_opts.nn_eval_block_size = scores.size();
+ frm_cnt += scores.size();
+ if (this->scores.empty()) {
+ this->scores = scores; // the first calculation
+ } else {
+ this->scores.insert(this->scores.end(), scores.begin(), scores.end());
+ }
+ }
+
+ void PopDataBufTillFrame(int frame_idx) {
+ int frame_sample_length = int(vad_opts.frame_in_ms * vad_opts.sample_rate / 1000);
+ int start_pos=-1;
+ int data_length= data_buf.size();
+ while (data_buf_start_frame < frame_idx) {
+ if (data_length >= frame_sample_length) {
+ data_buf_start_frame += 1;
+ start_pos= data_buf_start_frame* frame_sample_length;
+ data_length=data_buf_all.size()-start_pos;
+ } else {
+ break;
+ }
+ }
+ if (start_pos!=-1){
+ data_buf.resize(data_length);
+ std::copy(data_buf_all.begin() + start_pos, data_buf_all.end(), data_buf.begin());
+ }
+ }
+
+ void PopDataToOutputBuf(int start_frm, int frm_cnt, bool first_frm_is_start_point, bool last_frm_is_end_point,
+ bool end_point_is_sent_end) {
+ PopDataBufTillFrame(start_frm);
+ int expected_sample_number = int(frm_cnt * vad_opts.sample_rate * vad_opts.frame_in_ms / 1000);
+ if (last_frm_is_end_point) {
+ int extra_sample = std::max(0, int(vad_opts.frame_length_ms * vad_opts.sample_rate / 1000 -
+ vad_opts.sample_rate * vad_opts.frame_in_ms / 1000));
+ expected_sample_number += int(extra_sample);
+ }
+ if (end_point_is_sent_end) {
+ expected_sample_number = std::max(expected_sample_number, int(data_buf.size()));
+ }
+ if (data_buf.size() < expected_sample_number) {
+ std::cout << "error in calling pop data_buf\n";
+ }
+ if (output_data_buf.size() == 0 || first_frm_is_start_point) {
+ output_data_buf.push_back(E2EVadSpeechBufWithDoa());
+ output_data_buf[output_data_buf.size() - 1].Reset();
+ output_data_buf[output_data_buf.size() - 1].start_ms = start_frm * vad_opts.frame_in_ms;
+ output_data_buf[output_data_buf.size() - 1].end_ms = output_data_buf[output_data_buf.size() - 1].start_ms;
+ output_data_buf[output_data_buf.size() - 1].doa = 0;
+ }
+ E2EVadSpeechBufWithDoa &cur_seg = output_data_buf.back();
+ if (cur_seg.end_ms != start_frm * vad_opts.frame_in_ms) {
+ std::cout << "warning\n";
+ }
+ int out_pos = (int) cur_seg.buffer.size();
+ int data_to_pop;
+ if (end_point_is_sent_end) {
+ data_to_pop = expected_sample_number;
+ } else {
+ data_to_pop = int(frm_cnt * vad_opts.frame_in_ms * vad_opts.sample_rate / 1000);
+ }
+ if (data_to_pop > int(data_buf.size())) {
+ std::cout << "VAD data_to_pop is bigger than data_buf.size()!!!\n";
+ data_to_pop = (int) data_buf.size();
+ expected_sample_number = (int) data_buf.size();
+ }
+ cur_seg.doa = 0;
+ for (int sample_cpy_out = 0; sample_cpy_out < data_to_pop; sample_cpy_out++) {
+ cur_seg.buffer.push_back(data_buf.back());
+ out_pos++;
+ }
+ for (int sample_cpy_out = data_to_pop; sample_cpy_out < expected_sample_number; sample_cpy_out++) {
+ cur_seg.buffer.push_back(data_buf.back());
+ out_pos++;
+ }
+ if (cur_seg.end_ms != start_frm * vad_opts.frame_in_ms) {
+ std::cout << "Something wrong with the VAD algorithm\n";
+ }
+ data_buf_start_frame += frm_cnt;
+ cur_seg.end_ms = (start_frm + frm_cnt) * vad_opts.frame_in_ms;
+ if (first_frm_is_start_point) {
+ cur_seg.contain_seg_start_point = true;
+ }
+ if (last_frm_is_end_point) {
+ cur_seg.contain_seg_end_point = true;
+ }
+ }
+
+ void OnSilenceDetected(int valid_frame) {
+ lastest_confirmed_silence_frame = valid_frame;
+ if (vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+ PopDataBufTillFrame(valid_frame);
+ }
+ // silence_detected_callback_
+ // pass
+ }
+
+ void OnVoiceDetected(int valid_frame) {
+ latest_confirmed_speech_frame = valid_frame;
+ PopDataToOutputBuf(valid_frame, 1, false, false, false);
+ }
+
+ void OnVoiceStart(int start_frame, bool fake_result = false) {
+ if (vad_opts.do_start_point_detection) {
+ // pass
+ }
+ if (confirmed_start_frame != -1) {
+ std::cout << "not reset vad properly\n";
+ } else {
+ confirmed_start_frame = start_frame;
+ }
+ if (!fake_result && vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+ PopDataToOutputBuf(confirmed_start_frame, 1, true, false, false);
+ }
+ }
+
+
+ void OnVoiceEnd(int end_frame, bool fake_result, bool is_last_frame) {
+ for (int t = latest_confirmed_speech_frame + 1; t < end_frame; t++) {
+ OnVoiceDetected(t);
+ }
+ if (vad_opts.do_end_point_detection) {
+ // pass
+ }
+ if (confirmed_end_frame != -1) {
+ std::cout << "not reset vad properly\n";
+ } else {
+ confirmed_end_frame = end_frame;
+ }
+ if (!fake_result) {
+ sil_frame = 0;
+ PopDataToOutputBuf(confirmed_end_frame, 1, false, true, is_last_frame);
+ }
+ number_end_time_detected++;
+ }
+
+ void MaybeOnVoiceEndIfLastFrame(bool is_final_frame, int cur_frm_idx) {
+ if (is_final_frame) {
+ OnVoiceEnd(cur_frm_idx, false, true);
+ vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+ }
+ }
+
+ int GetLatency() {
+ return int(LatencyFrmNumAtStartPoint() * vad_opts.frame_in_ms);
+ }
+
+ int LatencyFrmNumAtStartPoint() {
+ int vad_latency = windows_detector.GetWinSize();
+ if (vad_opts.do_extend) {
+ vad_latency += int(vad_opts.lookback_time_start_point / vad_opts.frame_in_ms);
+ }
+ return vad_latency;
+ }
+
+ FrameState GetFrameState(int t) {
+ FrameState frame_state = FrameState::kFrameStateInvalid;
+ float cur_decibel = decibel[t];
+ float cur_snr = cur_decibel - noise_average_decibel;
+ if (cur_decibel < vad_opts.decibel_thres) {
+ frame_state = FrameState::kFrameStateSil;
+ DetectOneFrame(frame_state, t, false);
+ return frame_state;
+ }
+ float sum_score = 0.0;
+ float noise_prob = 0.0;
+ assert(sil_pdf_ids.size() == vad_opts.silence_pdf_num);
+ if (sil_pdf_ids.size() > 0) {
+ std::vector<float> sil_pdf_scores;
+ for (auto sil_pdf_id: sil_pdf_ids) {
+ sil_pdf_scores.push_back(scores[t][sil_pdf_id]);
+ }
+ sum_score = accumulate(sil_pdf_scores.begin(), sil_pdf_scores.end(), 0.0);
+ noise_prob = log(sum_score) * vad_opts.speech_2_noise_ratio;
+ float total_score = 1.0;
+ sum_score = total_score - sum_score;
+ }
+ float speech_prob = log(sum_score);
+ if (vad_opts.output_frame_probs) {
+ E2EVadFrameProb frame_prob;
+ frame_prob.noise_prob = noise_prob;
+ frame_prob.speech_prob = speech_prob;
+ frame_prob.score = sum_score;
+ frame_prob.frame_id = t;
+ frame_probs.push_back(frame_prob);
+ }
+ if (exp(speech_prob) >= exp(noise_prob) + speech_noise_thres) {
+ if (cur_snr >= vad_opts.snr_thres && cur_decibel >= vad_opts.decibel_thres) {
+ frame_state = FrameState::kFrameStateSpeech;
+ } else {
+ frame_state = FrameState::kFrameStateSil;
+ }
+ } else {
+ frame_state = FrameState::kFrameStateSil;
+ if (noise_average_decibel < -99.9) {
+ noise_average_decibel = cur_decibel;
+ } else {
+ noise_average_decibel =
+ (cur_decibel + noise_average_decibel * (vad_opts.noise_frame_num_used_for_snr - 1)) /
+ vad_opts.noise_frame_num_used_for_snr;
+ }
+ }
+ return frame_state;
+ }
+
+ int DetectCommonFrames() {
+ if (vad_state_machine == VadStateMachine::kVadInStateEndPointDetected) {
+ return 0;
+ }
+ for (int i = vad_opts.nn_eval_block_size - 1; i >= 0; i--) {
+ FrameState frame_state = FrameState::kFrameStateInvalid;
+ frame_state = GetFrameState(frm_cnt - 1 - i);
+ DetectOneFrame(frame_state, frm_cnt - 1 - i, false);
+ }
+ return 0;
+ }
+
+ int DetectLastFrames() {
+ if (vad_state_machine == VadStateMachine::kVadInStateEndPointDetected) {
+ return 0;
+ }
+ for (int i = vad_opts.nn_eval_block_size - 1; i >= 0; i--) {
+ FrameState frame_state = FrameState::kFrameStateInvalid;
+ frame_state = GetFrameState(frm_cnt - 1 - i);
+ if (i != 0) {
+ DetectOneFrame(frame_state, frm_cnt - 1 - i, false);
+ } else {
+ DetectOneFrame(frame_state, frm_cnt - 1, true);
+ }
+ }
+ return 0;
+ }
+
+ void DetectOneFrame(FrameState cur_frm_state, int cur_frm_idx, bool is_final_frame) {
+ FrameState tmp_cur_frm_state = FrameState::kFrameStateInvalid;
+ if (cur_frm_state == FrameState::kFrameStateSpeech) {
+ if (std::fabs(1.0) > vad_opts.fe_prior_thres) {
+ tmp_cur_frm_state = FrameState::kFrameStateSpeech;
+ } else {
+ tmp_cur_frm_state = FrameState::kFrameStateSil;
+ }
+ } else if (cur_frm_state == FrameState::kFrameStateSil) {
+ tmp_cur_frm_state = FrameState::kFrameStateSil;
+ }
+ AudioChangeState state_change = windows_detector.DetectOneFrame(tmp_cur_frm_state, cur_frm_idx);
+ int frm_shift_in_ms = vad_opts.frame_in_ms;
+ if (AudioChangeState::kChangeStateSil2Speech == state_change) {
+ int silence_frame_count = continous_silence_frame_count;
+ continous_silence_frame_count = 0;
+ pre_end_silence_detected = false;
+ int start_frame = 0;
+ if (vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+ start_frame = std::max(data_buf_start_frame, cur_frm_idx - LatencyFrmNumAtStartPoint());
+ OnVoiceStart(start_frame);
+ vad_state_machine = VadStateMachine::kVadInStateInSpeechSegment;
+ for (int t = start_frame + 1; t <= cur_frm_idx; t++) {
+ OnVoiceDetected(t);
+ }
+ } else if (vad_state_machine == VadStateMachine::kVadInStateInSpeechSegment) {
+ for (int t = latest_confirmed_speech_frame + 1; t < cur_frm_idx; t++) {
+ OnVoiceDetected(t);
+ }
+ if (cur_frm_idx - confirmed_start_frame + 1 > vad_opts.max_single_segment_time / frm_shift_in_ms) {
+ OnVoiceEnd(cur_frm_idx, false, false);
+ vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+ } else if (!is_final_frame) {
+ OnVoiceDetected(cur_frm_idx);
+ } else {
+ MaybeOnVoiceEndIfLastFrame(is_final_frame, cur_frm_idx);
+ }
+ }
+ } else if (AudioChangeState::kChangeStateSpeech2Sil == state_change) {
+ continous_silence_frame_count = 0;
+ if (vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+ // do nothing
+ } else if (vad_state_machine == VadStateMachine::kVadInStateInSpeechSegment) {
+ if (cur_frm_idx - confirmed_start_frame + 1 >
+ vad_opts.max_single_segment_time / frm_shift_in_ms) {
+ OnVoiceEnd(cur_frm_idx, false, false);
+ vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+ } else if (!is_final_frame) {
+ OnVoiceDetected(cur_frm_idx);
+ } else {
+ MaybeOnVoiceEndIfLastFrame(is_final_frame, cur_frm_idx);
+ }
+ }
+ } else if (AudioChangeState::kChangeStateSpeech2Speech == state_change) {
+ continous_silence_frame_count = 0;
+ if (vad_state_machine == VadStateMachine::kVadInStateInSpeechSegment) {
+ if (cur_frm_idx - confirmed_start_frame + 1 >
+ vad_opts.max_single_segment_time / frm_shift_in_ms) {
+ max_time_out = true;
+ OnVoiceEnd(cur_frm_idx, false, false);
+ vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+ } else if (!is_final_frame) {
+ OnVoiceDetected(cur_frm_idx);
+ } else {
+ MaybeOnVoiceEndIfLastFrame(is_final_frame, cur_frm_idx);
+ }
+ }
+ } else if (AudioChangeState::kChangeStateSil2Sil == state_change) {
+ continous_silence_frame_count += 1;
+ if (vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+ if ((vad_opts.detect_mode == static_cast<int>(VadDetectMode::kVadSingleUtteranceDetectMode) &&
+ (continous_silence_frame_count * frm_shift_in_ms > vad_opts.max_start_silence_time)) ||
+ (is_final_frame && number_end_time_detected == 0)) {
+ for (int t = lastest_confirmed_silence_frame + 1; t < cur_frm_idx; t++) {
+ OnSilenceDetected(t);
+ }
+ OnVoiceStart(0, true);
+ OnVoiceEnd(0, true, false);
+ vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+ } else {
+ if (cur_frm_idx >= LatencyFrmNumAtStartPoint()) {
+ OnSilenceDetected(cur_frm_idx - LatencyFrmNumAtStartPoint());
+ }
+ }
+ } else if (vad_state_machine == VadStateMachine::kVadInStateInSpeechSegment) {
+ if (continous_silence_frame_count * frm_shift_in_ms >= max_end_sil_frame_cnt_thresh) {
+ int lookback_frame = max_end_sil_frame_cnt_thresh / frm_shift_in_ms;
+ if (vad_opts.do_extend) {
+ lookback_frame -= vad_opts.lookahead_time_end_point / frm_shift_in_ms;
+ lookback_frame -= 1;
+ lookback_frame = std::max(0, lookback_frame);
+ }
+ OnVoiceEnd(cur_frm_idx - lookback_frame, false, false);
+ vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+ } else if (cur_frm_idx - confirmed_start_frame + 1 >
+ vad_opts.max_single_segment_time / frm_shift_in_ms) {
+ OnVoiceEnd(cur_frm_idx, false, false);
+ vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+ } else if (vad_opts.do_extend && !is_final_frame) {
+ if (continous_silence_frame_count <= vad_opts.lookahead_time_end_point / frm_shift_in_ms) {
+ OnVoiceDetected(cur_frm_idx);
+ }
+ } else {
+ MaybeOnVoiceEndIfLastFrame(is_final_frame, cur_frm_idx);
+ }
+ }
+ }
+ if (vad_state_machine == VadStateMachine::kVadInStateEndPointDetected &&
+ vad_opts.detect_mode == static_cast<int>(VadDetectMode::kVadMutipleUtteranceDetectMode)) {
+ ResetDetection();
+ }
+ }
+
+};
+
+
+
diff --git a/funasr/runtime/onnxruntime/src/fsmn-vad.cpp b/funasr/runtime/onnxruntime/src/fsmn-vad.cpp
new file mode 100644
index 0000000..0f87cb2
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/fsmn-vad.cpp
@@ -0,0 +1,273 @@
+
+#include <fstream>
+#include "precomp.h"
+//#include "glog/logging.h"
+
+
+void FsmnVad::InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
+ float vad_speech_noise_thres) {
+ session_options_.SetIntraOpNumThreads(1);
+ session_options_.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
+ session_options_.DisableCpuMemArena();
+ this->vad_sample_rate_ = vad_sample_rate;
+ this->vad_silence_duration_=vad_silence_duration;
+ this->vad_max_len_=vad_max_len;
+ this->vad_speech_noise_thres_=vad_speech_noise_thres;
+
+ ReadModel(vad_model);
+ LoadCmvn(vad_cmvn.c_str());
+ InitCache();
+
+ fbank_opts.frame_opts.dither = 0;
+ fbank_opts.mel_opts.num_bins = 80;
+ fbank_opts.frame_opts.samp_freq = vad_sample_rate;
+ fbank_opts.frame_opts.window_type = "hamming";
+ fbank_opts.frame_opts.frame_shift_ms = 10;
+ fbank_opts.frame_opts.frame_length_ms = 25;
+ fbank_opts.energy_floor = 0;
+ fbank_opts.mel_opts.debug_mel = false;
+
+}
+
+void FsmnVad::ReadModel(const std::string &vad_model) {
+ try {
+ vad_session_ = std::make_shared<Ort::Session>(
+ env_, vad_model.c_str(), session_options_);
+ } catch (std::exception const &e) {
+ //LOG(ERROR) << "Error when load onnx model: " << e.what();
+ exit(0);
+ }
+ //LOG(INFO) << "vad onnx:";
+ GetInputOutputInfo(vad_session_, &vad_in_names_, &vad_out_names_);
+}
+
+void FsmnVad::GetInputOutputInfo(
+ const std::shared_ptr<Ort::Session> &session,
+ std::vector<const char *> *in_names, std::vector<const char *> *out_names) {
+ Ort::AllocatorWithDefaultOptions allocator;
+ // Input info
+ int num_nodes = session->GetInputCount();
+ in_names->resize(num_nodes);
+ for (int i = 0; i < num_nodes; ++i) {
+ std::unique_ptr<char, Ort::detail::AllocatedFree> name = session->GetInputNameAllocated(i, allocator);
+ Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
+ auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
+ ONNXTensorElementDataType type = tensor_info.GetElementType();
+ std::vector<int64_t> node_dims = tensor_info.GetShape();
+ std::stringstream shape;
+ for (auto j: node_dims) {
+ shape << j;
+ shape << " ";
+ }
+ // LOG(INFO) << "\tInput " << i << " : name=" << name.get() << " type=" << type
+ // << " dims=" << shape.str();
+ (*in_names)[i] = name.get();
+ name.release();
+ }
+ // Output info
+ num_nodes = session->GetOutputCount();
+ out_names->resize(num_nodes);
+ for (int i = 0; i < num_nodes; ++i) {
+ std::unique_ptr<char, Ort::detail::AllocatedFree> name = session->GetOutputNameAllocated(i, allocator);
+ Ort::TypeInfo type_info = session->GetOutputTypeInfo(i);
+ auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
+ ONNXTensorElementDataType type = tensor_info.GetElementType();
+ std::vector<int64_t> node_dims = tensor_info.GetShape();
+ std::stringstream shape;
+ for (auto j: node_dims) {
+ shape << j;
+ shape << " ";
+ }
+ // LOG(INFO) << "\tOutput " << i << " : name=" << name.get() << " type=" << type
+ // << " dims=" << shape.str();
+ (*out_names)[i] = name.get();
+ name.release();
+ }
+}
+
+
+void FsmnVad::Forward(
+ const std::vector<std::vector<float>> &chunk_feats,
+ std::vector<std::vector<float>> *out_prob) {
+ Ort::MemoryInfo memory_info =
+ Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+
+ int num_frames = chunk_feats.size();
+ const int feature_dim = chunk_feats[0].size();
+
+ // 2. Generate input nodes tensor
+ // vad node { batch,frame number,feature dim }
+ const int64_t vad_feats_shape[3] = {1, num_frames, feature_dim};
+ std::vector<float> vad_feats;
+ for (const auto &chunk_feat: chunk_feats) {
+ vad_feats.insert(vad_feats.end(), chunk_feat.begin(), chunk_feat.end());
+ }
+ Ort::Value vad_feats_ort = Ort::Value::CreateTensor<float>(
+ memory_info, vad_feats.data(), vad_feats.size(), vad_feats_shape, 3);
+
+ // 3. Put nodes into onnx input vector
+ std::vector<Ort::Value> vad_inputs;
+ vad_inputs.emplace_back(std::move(vad_feats_ort));
+ // 4 caches
+ // cache node {batch,128,19,1}
+ const int64_t cache_feats_shape[4] = {1, 128, 19, 1};
+ for (int i = 0; i < in_cache_.size(); i++) {
+ vad_inputs.emplace_back(std::move(Ort::Value::CreateTensor<float>(
+ memory_info, in_cache_[i].data(), in_cache_[i].size(), cache_feats_shape, 4)));
+ }
+
+ // 4. Onnx infer
+ std::vector<Ort::Value> vad_ort_outputs;
+ try {
+ // VLOG(3) << "Start infer";
+ vad_ort_outputs = vad_session_->Run(
+ Ort::RunOptions{nullptr}, vad_in_names_.data(), vad_inputs.data(),
+ vad_inputs.size(), vad_out_names_.data(), vad_out_names_.size());
+ } catch (std::exception const &e) {
+ // LOG(ERROR) << e.what();
+ return;
+ }
+
+ // 5. Change infer result to output shapes
+ float *logp_data = vad_ort_outputs[0].GetTensorMutableData<float>();
+ auto type_info = vad_ort_outputs[0].GetTensorTypeAndShapeInfo();
+
+ int num_outputs = type_info.GetShape()[1];
+ int output_dim = type_info.GetShape()[2];
+ out_prob->resize(num_outputs);
+ for (int i = 0; i < num_outputs; i++) {
+ (*out_prob)[i].resize(output_dim);
+ memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
+ sizeof(float) * output_dim);
+ }
+
+ // get 4 caches outputs,each size is 128*19
+ for (int i = 1; i < 5; i++) {
+ float* data = vad_ort_outputs[i].GetTensorMutableData<float>();
+ memcpy(in_cache_[i-1].data(), data, sizeof(float) * 128*19);
+ }
+}
+
+void FsmnVad::FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
+ const std::vector<float> &waves) {
+ knf::OnlineFbank fbank(fbank_opts);
+
+ fbank.AcceptWaveform(sample_rate, &waves[0], waves.size());
+ int32_t frames = fbank.NumFramesReady();
+ for (int32_t i = 0; i != frames; ++i) {
+ const float *frame = fbank.GetFrame(i);
+ std::vector<float> frame_vector(frame, frame + fbank_opts.mel_opts.num_bins);
+ vad_feats.emplace_back(frame_vector);
+ }
+}
+
+void FsmnVad::LoadCmvn(const char *filename)
+{
+ using namespace std;
+ ifstream cmvn_stream(filename);
+ string line;
+
+ while (getline(cmvn_stream, line)) {
+ istringstream iss(line);
+ vector<string> line_item{istream_iterator<string>{iss}, istream_iterator<string>{}};
+ if (line_item[0] == "<AddShift>") {
+ getline(cmvn_stream, line);
+ istringstream means_lines_stream(line);
+ vector<string> means_lines{istream_iterator<string>{means_lines_stream}, istream_iterator<string>{}};
+ if (means_lines[0] == "<LearnRateCoef>") {
+ for (int j = 3; j < means_lines.size() - 1; j++) {
+ means_list.push_back(stof(means_lines[j]));
+ }
+ continue;
+ }
+ }
+ else if (line_item[0] == "<Rescale>") {
+ getline(cmvn_stream, line);
+ istringstream vars_lines_stream(line);
+ vector<string> vars_lines{istream_iterator<string>{vars_lines_stream}, istream_iterator<string>{}};
+ if (vars_lines[0] == "<LearnRateCoef>") {
+ for (int j = 3; j < vars_lines.size() - 1; j++) {
+ // vars_list.push_back(stof(vars_lines[j])*scale);
+ vars_list.push_back(stof(vars_lines[j]));
+ }
+ continue;
+ }
+ }
+ }
+}
+
+std::vector<std::vector<float>> &FsmnVad::LfrCmvn(std::vector<std::vector<float>> &vad_feats, int lfr_m, int lfr_n) {
+
+ std::vector<std::vector<float>> out_feats;
+ int T = vad_feats.size();
+ int T_lrf = ceil(1.0 * T / lfr_n);
+
+ // Pad frames at start(copy first frame)
+ for (int i = 0; i < (lfr_m - 1) / 2; i++) {
+ vad_feats.insert(vad_feats.begin(), vad_feats[0]);
+ }
+ // Merge lfr_m frames as one,lfr_n frames per window
+ T = T + (lfr_m - 1) / 2;
+ std::vector<float> p;
+ for (int i = 0; i < T_lrf; i++) {
+ if (lfr_m <= T - i * lfr_n) {
+ for (int j = 0; j < lfr_m; j++) {
+ p.insert(p.end(), vad_feats[i * lfr_n + j].begin(), vad_feats[i * lfr_n + j].end());
+ }
+ out_feats.emplace_back(p);
+ p.clear();
+ } else {
+ // Fill to lfr_m frames at last window if less than lfr_m frames (copy last frame)
+ int num_padding = lfr_m - (T - i * lfr_n);
+ for (int j = 0; j < (vad_feats.size() - i * lfr_n); j++) {
+ p.insert(p.end(), vad_feats[i * lfr_n + j].begin(), vad_feats[i * lfr_n + j].end());
+ }
+ for (int j = 0; j < num_padding; j++) {
+ p.insert(p.end(), vad_feats[vad_feats.size() - 1].begin(), vad_feats[vad_feats.size() - 1].end());
+ }
+ out_feats.emplace_back(p);
+ }
+ }
+ // Apply cmvn
+ for (auto &out_feat: out_feats) {
+ for (int j = 0; j < means_list.size(); j++) {
+ out_feat[j] = (out_feat[j] + means_list[j]) * vars_list[j];
+ }
+ }
+ vad_feats = out_feats;
+ return vad_feats;
+}
+
+std::vector<std::vector<int>>
+FsmnVad::Infer(const std::vector<float> &waves) {
+ std::vector<std::vector<float>> vad_feats;
+ std::vector<std::vector<float>> vad_probs;
+ FbankKaldi(vad_sample_rate_, vad_feats, waves);
+ vad_feats = LfrCmvn(vad_feats, 5, 1);
+ Forward(vad_feats, &vad_probs);
+
+ E2EVadModel vad_scorer = E2EVadModel();
+ std::vector<std::vector<int>> vad_segments;
+ vad_segments = vad_scorer(vad_probs, waves, true, false, vad_silence_duration_, vad_max_len_,
+ vad_speech_noise_thres_, vad_sample_rate_);
+ return vad_segments;
+
+}
+
+void FsmnVad::InitCache(){
+ std::vector<float> cache_feats(128 * 19 * 1, 0);
+ for (int i=0;i<4;i++){
+ in_cache_.emplace_back(cache_feats);
+ }
+};
+
+void FsmnVad::Reset(){
+ in_cache_.clear();
+ InitCache();
+};
+
+void FsmnVad::Test() {
+}
+
+FsmnVad::FsmnVad():env_(ORT_LOGGING_LEVEL_ERROR, ""),session_options_{} {
+}
diff --git a/funasr/runtime/onnxruntime/src/fsmn-vad.h b/funasr/runtime/onnxruntime/src/fsmn-vad.h
new file mode 100644
index 0000000..e8569f9
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/fsmn-vad.h
@@ -0,0 +1,60 @@
+
+#ifndef VAD_SERVER_FSMNVAD_H
+#define VAD_SERVER_FSMNVAD_H
+
+#include "precomp.h"
+
+class FsmnVad {
+/**
+ * Author: Speech Lab of DAMO Academy, Alibaba Group
+ * Deep-FSMN for Large Vocabulary Continuous Speech Recognition
+ * https://arxiv.org/abs/1803.05030
+*/
+
+public:
+ FsmnVad();
+ void Test();
+ void InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
+ float vad_speech_noise_thres);
+
+ std::vector<std::vector<int>> Infer(const std::vector<float> &waves);
+ void Reset();
+
+private:
+
+ void ReadModel(const std::string &vad_model);
+
+ static void GetInputOutputInfo(
+ const std::shared_ptr<Ort::Session> &session,
+ std::vector<const char *> *in_names, std::vector<const char *> *out_names);
+
+ void FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
+ const std::vector<float> &waves);
+
+ std::vector<std::vector<float>> &LfrCmvn(std::vector<std::vector<float>> &vad_feats, int lfr_m, int lfr_n);
+
+ void Forward(
+ const std::vector<std::vector<float>> &chunk_feats,
+ std::vector<std::vector<float>> *out_prob);
+
+ void LoadCmvn(const char *filename);
+ void InitCache();
+
+ std::shared_ptr<Ort::Session> vad_session_ = nullptr;
+ Ort::Env env_;
+ Ort::SessionOptions session_options_;
+ std::vector<const char *> vad_in_names_;
+ std::vector<const char *> vad_out_names_;
+ std::vector<std::vector<float>> in_cache_;
+
+ knf::FbankOptions fbank_opts;
+ std::vector<float> means_list;
+ std::vector<float> vars_list;
+ int vad_sample_rate_ = 16000;
+ int vad_silence_duration_ = 800;
+ int vad_max_len_ = 15000;
+ double vad_speech_noise_thres_ = 0.9;
+};
+
+
+#endif //VAD_SERVER_FSMNVAD_H
diff --git a/funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp b/funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp
new file mode 100644
index 0000000..1d822a0
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp
@@ -0,0 +1,140 @@
+
+#ifndef _WIN32
+#include <sys/time.h>
+#else
+#include <win_func.h>
+#endif
+
+#include "libfunasrapi.h"
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <atomic>
+#include <mutex>
+#include <thread>
+using namespace std;
+
+std::atomic<int> index(0);
+std::mutex mtx;
+
+void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list,
+ float* total_length, long* total_time, int core_id) {
+
+ struct timeval start, end;
+ long seconds = 0;
+ float n_total_length = 0.0f;
+ long n_total_time = 0;
+
+ // warm up
+ for (size_t i = 0; i < 1; i++)
+ {
+ FUNASR_RESULT result=FunASRRecogFile(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL);
+ }
+
+ while (true) {
+ // 浣跨敤鍘熷瓙鍙橀噺鑾峰彇绱㈠紩骞堕�掑
+ int i = index.fetch_add(1);
+ if (i >= wav_list.size()) {
+ break;
+ }
+
+ gettimeofday(&start, NULL);
+ FUNASR_RESULT result=FunASRRecogFile(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL);
+
+ gettimeofday(&end, NULL);
+ seconds = (end.tv_sec - start.tv_sec);
+ long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
+ n_total_time += taking_micros;
+
+ if(result){
+ string msg = FunASRGetResult(result, 0);
+ printf("Thread: %d Result: %s \n", this_thread::get_id(), msg.c_str());
+
+ float snippet_time = FunASRGetRetSnippetTime(result);
+ n_total_length += snippet_time;
+ FunASRFreeResult(result);
+ }else{
+ cout <<"No return data!";
+ }
+ }
+ {
+ lock_guard<mutex> guard(mtx);
+ *total_length += n_total_length;
+ if(*total_time < n_total_time){
+ *total_time = n_total_time;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+
+ if (argc < 5)
+ {
+ printf("Usage: %s /path/to/model_dir /path/to/wav.scp quantize(true or false) thread_num \n", argv[0]);
+ exit(-1);
+ }
+
+ // read wav.scp
+ vector<string> wav_list;
+ ifstream in(argv[2]);
+ if (!in.is_open()) {
+ printf("Failed to open file: %s", argv[2]);
+ return 0;
+ }
+ string line;
+ while(getline(in, line))
+ {
+ istringstream iss(line);
+ string column1, column2;
+ iss >> column1 >> column2;
+ wav_list.push_back(column2);
+ }
+ in.close();
+
+ // model init
+ struct timeval start, end;
+ gettimeofday(&start, NULL);
+ // is quantize
+ bool quantize = false;
+ istringstream(argv[3]) >> boolalpha >> quantize;
+ // thread num
+ int thread_num = 1;
+ thread_num = atoi(argv[4]);
+
+ FUNASR_HANDLE asr_handle=FunASRInit(argv[1], 1, quantize);
+ if (!asr_handle)
+ {
+ printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
+ exit(-1);
+ }
+ gettimeofday(&end, NULL);
+ long seconds = (end.tv_sec - start.tv_sec);
+ long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
+ printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);
+
+ // 澶氱嚎绋嬫祴璇�
+ float total_length = 0.0f;
+ long total_time = 0;
+ std::vector<std::thread> threads;
+
+ for (int i = 0; i < thread_num; i++)
+ {
+ threads.emplace_back(thread(runReg, asr_handle, wav_list, &total_length, &total_time, i));
+ }
+
+ for (auto& thread : threads)
+ {
+ thread.join();
+ }
+
+ printf("total_time_wav %ld ms.\n", (long)(total_length * 1000));
+ printf("total_time_comput %ld ms.\n", total_time / 1000);
+ printf("total_rtf %05lf .\n", (double)total_time/ (total_length*1000000));
+ printf("speedup %05lf .\n", 1.0/((double)total_time/ (total_length*1000000)));
+
+ FunASRUninit(asr_handle);
+ return 0;
+}
diff --git a/funasr/runtime/onnxruntime/tester/tester.cpp b/funasr/runtime/onnxruntime/src/funasr-onnx-offline.cpp
similarity index 65%
rename from funasr/runtime/onnxruntime/tester/tester.cpp
rename to funasr/runtime/onnxruntime/src/funasr-onnx-offline.cpp
index 7257603..b0d2e4d 100644
--- a/funasr/runtime/onnxruntime/tester/tester.cpp
+++ b/funasr/runtime/onnxruntime/src/funasr-onnx-offline.cpp
@@ -6,29 +6,29 @@
#endif
#include "libfunasrapi.h"
-
-#include <iostream>
-#include <fstream>
#include <sstream>
using namespace std;
int main(int argc, char *argv[])
{
-
- if (argc < 4)
+ if (argc < 6)
{
- printf("Usage: %s /path/to/model_dir /path/to/wav/file quantize(true or false) \n", argv[0]);
+ printf("Usage: %s /path/to/model_dir /path/to/wav/file quantize(true or false) use_vad(true or false) use_punc(true or false)\n", argv[0]);
exit(-1);
}
struct timeval start, end;
gettimeofday(&start, NULL);
- int nThreadNum = 4;
+ int thread_num = 1;
// is quantize
bool quantize = false;
+ bool use_vad = false;
+ bool use_punc = false;
istringstream(argv[3]) >> boolalpha >> quantize;
- FUNASR_HANDLE AsrHanlde=FunASRInit(argv[1], nThreadNum, quantize);
+ istringstream(argv[4]) >> boolalpha >> use_vad;
+ istringstream(argv[5]) >> boolalpha >> use_punc;
+ FUNASR_HANDLE asr_hanlde=FunASRInit(argv[1], thread_num, quantize, use_vad, use_punc);
- if (!AsrHanlde)
+ if (!asr_hanlde)
{
printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
exit(-1);
@@ -40,23 +40,21 @@
printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);
gettimeofday(&start, NULL);
- float snippet_time = 0.0f;
-
- FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL);
-
+ FUNASR_RESULT result=FunASRRecogFile(asr_hanlde, argv[2], RASR_NONE, NULL, use_vad, use_punc);
gettimeofday(&end, NULL);
-
- if (Result)
+
+ float snippet_time = 0.0f;
+ if (result)
{
- string msg = FunASRGetResult(Result, 0);
+ string msg = FunASRGetResult(result, 0);
setbuf(stdout, NULL);
printf("Result: %s \n", msg.c_str());
- snippet_time = FunASRGetRetSnippetTime(Result);
- FunASRFreeResult(Result);
+ snippet_time = FunASRGetRetSnippetTime(result);
+ FunASRFreeResult(result);
}
else
{
- cout <<"no return data!";
+ printf("no return data!\n");
}
printf("Audio length %lfs.\n", (double)snippet_time);
@@ -65,7 +63,7 @@
printf("Model inference takes %lfs.\n", (double)taking_micros / 1000000);
printf("Model inference RTF: %04lf.\n", (double)taking_micros/ (snippet_time*1000000));
- FunASRUninit(AsrHanlde);
+ FunASRUninit(asr_hanlde);
return 0;
}
diff --git a/funasr/runtime/onnxruntime/src/libfunasrapi.cpp b/funasr/runtime/onnxruntime/src/libfunasrapi.cpp
index a2ecf10..10c061e 100644
--- a/funasr/runtime/onnxruntime/src/libfunasrapi.cpp
+++ b/funasr/runtime/onnxruntime/src/libfunasrapi.cpp
@@ -4,177 +4,197 @@
extern "C" {
#endif
- // APIs for qmasr
- _FUNASRAPI FUNASR_HANDLE FunASRInit(const char* szModelDir, int nThreadNum, bool quantize)
+ // APIs for funasr
+ _FUNASRAPI FUNASR_HANDLE FunASRInit(const char* sz_model_dir, int thread_num, bool quantize, bool use_vad, bool use_punc)
{
- Model* mm = create_model(szModelDir, nThreadNum, quantize);
+ Model* mm = CreateModel(sz_model_dir, thread_num, quantize, use_vad, use_punc);
return mm;
}
- _FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
+ _FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad, bool use_punc)
{
- Model* pRecogObj = (Model*)handle;
- if (!pRecogObj)
+ Model* recog_obj = (Model*)handle;
+ if (!recog_obj)
return nullptr;
int32_t sampling_rate = -1;
Audio audio(1);
- if (!audio.loadwav(szBuf, nLen, &sampling_rate))
+ if (!audio.LoadWav(sz_buf, n_len, &sampling_rate))
return nullptr;
- //audio.split();
+ if(use_vad){
+ audio.Split(recog_obj);
+ }
float* buff;
int len;
int flag=0;
- FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
- pResult->snippet_time = audio.get_time_len();
- int nStep = 0;
- int nTotal = audio.get_queue_size();
- while (audio.fetch(buff, len, flag) > 0) {
- //pRecogObj->reset();
- string msg = pRecogObj->forward(buff, len, flag);
- pResult->msg += msg;
- nStep++;
- if (fnCallback)
- fnCallback(nStep, nTotal);
+ FUNASR_RECOG_RESULT* p_result = new FUNASR_RECOG_RESULT;
+ p_result->snippet_time = audio.GetTimeLen();
+ int n_step = 0;
+ int n_total = audio.GetQueueSize();
+ while (audio.Fetch(buff, len, flag) > 0) {
+ string msg = recog_obj->Forward(buff, len, flag);
+ p_result->msg += msg;
+ n_step++;
+ if (fn_callback)
+ fn_callback(n_step, n_total);
+ }
+ if(use_punc){
+ string punc_res = recog_obj->AddPunc((p_result->msg).c_str());
+ p_result->msg = punc_res;
}
- return pResult;
+ return p_result;
}
- _FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
+ _FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad, bool use_punc)
{
- Model* pRecogObj = (Model*)handle;
- if (!pRecogObj)
+ Model* recog_obj = (Model*)handle;
+ if (!recog_obj)
return nullptr;
Audio audio(1);
- if (!audio.loadpcmwav(szBuf, nLen, &sampling_rate))
+ if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
return nullptr;
- //audio.split();
+ if(use_vad){
+ audio.Split(recog_obj);
+ }
float* buff;
int len;
int flag = 0;
- FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
- pResult->snippet_time = audio.get_time_len();
- int nStep = 0;
- int nTotal = audio.get_queue_size();
- while (audio.fetch(buff, len, flag) > 0) {
- //pRecogObj->reset();
- string msg = pRecogObj->forward(buff, len, flag);
- pResult->msg += msg;
- nStep++;
- if (fnCallback)
- fnCallback(nStep, nTotal);
+ FUNASR_RECOG_RESULT* p_result = new FUNASR_RECOG_RESULT;
+ p_result->snippet_time = audio.GetTimeLen();
+ int n_step = 0;
+ int n_total = audio.GetQueueSize();
+ while (audio.Fetch(buff, len, flag) > 0) {
+ string msg = recog_obj->Forward(buff, len, flag);
+ p_result->msg += msg;
+ n_step++;
+ if (fn_callback)
+ fn_callback(n_step, n_total);
+ }
+ if(use_punc){
+ string punc_res = recog_obj->AddPunc((p_result->msg).c_str());
+ p_result->msg = punc_res;
}
- return pResult;
+ return p_result;
}
- _FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
+ _FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad, bool use_punc)
{
- Model* pRecogObj = (Model*)handle;
- if (!pRecogObj)
+ Model* recog_obj = (Model*)handle;
+ if (!recog_obj)
return nullptr;
Audio audio(1);
- if (!audio.loadpcmwav(szFileName, &sampling_rate))
+ if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
return nullptr;
- //audio.split();
+ if(use_vad){
+ audio.Split(recog_obj);
+ }
float* buff;
int len;
int flag = 0;
- FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
- pResult->snippet_time = audio.get_time_len();
- int nStep = 0;
- int nTotal = audio.get_queue_size();
- while (audio.fetch(buff, len, flag) > 0) {
- //pRecogObj->reset();
- string msg = pRecogObj->forward(buff, len, flag);
- pResult->msg += msg;
- nStep++;
- if (fnCallback)
- fnCallback(nStep, nTotal);
+ FUNASR_RECOG_RESULT* p_result = new FUNASR_RECOG_RESULT;
+ p_result->snippet_time = audio.GetTimeLen();
+ int n_step = 0;
+ int n_total = audio.GetQueueSize();
+ while (audio.Fetch(buff, len, flag) > 0) {
+ string msg = recog_obj->Forward(buff, len, flag);
+ p_result->msg += msg;
+ n_step++;
+ if (fn_callback)
+ fn_callback(n_step, n_total);
+ }
+ if(use_punc){
+ string punc_res = recog_obj->AddPunc((p_result->msg).c_str());
+ p_result->msg = punc_res;
}
- return pResult;
+ return p_result;
}
- _FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* szWavfile, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
+ _FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad, bool use_punc)
{
- Model* pRecogObj = (Model*)handle;
- if (!pRecogObj)
+ Model* recog_obj = (Model*)handle;
+ if (!recog_obj)
return nullptr;
int32_t sampling_rate = -1;
Audio audio(1);
- if(!audio.loadwav(szWavfile, &sampling_rate))
+ if(!audio.LoadWav(sz_wavfile, &sampling_rate))
return nullptr;
- //audio.split();
+ if(use_vad){
+ audio.Split(recog_obj);
+ }
float* buff;
int len;
int flag = 0;
- int nStep = 0;
- int nTotal = audio.get_queue_size();
- FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
- pResult->snippet_time = audio.get_time_len();
- while (audio.fetch(buff, len, flag) > 0) {
- //pRecogObj->reset();
- string msg = pRecogObj->forward(buff, len, flag);
- pResult->msg+= msg;
- nStep++;
- if (fnCallback)
- fnCallback(nStep, nTotal);
+ int n_step = 0;
+ int n_total = audio.GetQueueSize();
+ FUNASR_RECOG_RESULT* p_result = new FUNASR_RECOG_RESULT;
+ p_result->snippet_time = audio.GetTimeLen();
+ while (audio.Fetch(buff, len, flag) > 0) {
+ string msg = recog_obj->Forward(buff, len, flag);
+ p_result->msg+= msg;
+ n_step++;
+ if (fn_callback)
+ fn_callback(n_step, n_total);
+ }
+ if(use_punc){
+ string punc_res = recog_obj->AddPunc((p_result->msg).c_str());
+ p_result->msg = punc_res;
}
- return pResult;
+ return p_result;
}
- _FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT Result)
+ _FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result)
{
- if (!Result)
+ if (!result)
return 0;
return 1;
}
- _FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT Result)
+ _FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT result)
{
- if (!Result)
+ if (!result)
return 0.0f;
- return ((FUNASR_RECOG_RESULT*)Result)->snippet_time;
+ return ((FUNASR_RECOG_RESULT*)result)->snippet_time;
}
- _FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT Result,int nIndex)
+ _FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT result,int n_index)
{
- FUNASR_RECOG_RESULT * pResult = (FUNASR_RECOG_RESULT*)Result;
- if(!pResult)
+ FUNASR_RECOG_RESULT * p_result = (FUNASR_RECOG_RESULT*)result;
+ if(!p_result)
return nullptr;
- return pResult->msg.c_str();
+ return p_result->msg.c_str();
}
- _FUNASRAPI void FunASRFreeResult(FUNASR_RESULT Result)
+ _FUNASRAPI void FunASRFreeResult(FUNASR_RESULT result)
{
- if (Result)
+ if (result)
{
- delete (FUNASR_RECOG_RESULT*)Result;
+ delete (FUNASR_RECOG_RESULT*)result;
}
}
_FUNASRAPI void FunASRUninit(FUNASR_HANDLE handle)
{
- Model* pRecogObj = (Model*)handle;
+ Model* recog_obj = (Model*)handle;
- if (!pRecogObj)
+ if (!recog_obj)
return;
- delete pRecogObj;
+ delete recog_obj;
}
#ifdef __cplusplus
diff --git a/funasr/runtime/onnxruntime/src/model.cpp b/funasr/runtime/onnxruntime/src/model.cpp
new file mode 100644
index 0000000..a582f82
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/model.cpp
@@ -0,0 +1,8 @@
+#include "precomp.h"
+
+Model *CreateModel(const char *path, int thread_num, bool quantize, bool use_vad, bool use_punc)
+{
+ Model *mm;
+ mm = new paraformer::Paraformer(path, thread_num, quantize, use_vad, use_punc);
+ return mm;
+}
diff --git a/funasr/runtime/onnxruntime/src/online-feature.cpp b/funasr/runtime/onnxruntime/src/online-feature.cpp
new file mode 100644
index 0000000..36e2770
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/online-feature.cpp
@@ -0,0 +1,129 @@
+
+#include "online-feature.h"
+#include <utility>
+
+OnlineFeature::OnlineFeature(int sample_rate, knf::FbankOptions fbank_opts, int lfr_m, int lfr_n,
+ std::vector<std::vector<float>> cmvns)
+ : sample_rate_(sample_rate),
+ fbank_opts_(std::move(fbank_opts)),
+ lfr_m_(lfr_m),
+ lfr_n_(lfr_n),
+ cmvns_(std::move(cmvns)) {
+ frame_sample_length_ = sample_rate_ / 1000 * 25;;
+ frame_shift_sample_length_ = sample_rate_ / 1000 * 10;
+}
+
+void OnlineFeature::ExtractFeats(vector<std::vector<float>> &vad_feats,
+ vector<float> waves, bool input_finished) {
+ input_finished_ = input_finished;
+ OnlineFbank(vad_feats, waves);
+ // cache deal & online lfr,cmvn
+ if (vad_feats.size() > 0) {
+ if (!reserve_waveforms_.empty()) {
+ waves.insert(waves.begin(), reserve_waveforms_.begin(), reserve_waveforms_.end());
+ }
+ if (lfr_splice_cache_.empty()) {
+ for (int i = 0; i < (lfr_m_ - 1) / 2; i++) {
+ lfr_splice_cache_.emplace_back(vad_feats[0]);
+ }
+ }
+ if (vad_feats.size() + lfr_splice_cache_.size() >= lfr_m_) {
+ vad_feats.insert(vad_feats.begin(), lfr_splice_cache_.begin(), lfr_splice_cache_.end());
+ int frame_from_waves = (waves.size() - frame_sample_length_) / frame_shift_sample_length_ + 1;
+ int minus_frame = reserve_waveforms_.empty() ? (lfr_m_ - 1) / 2 : 0;
+ int lfr_splice_frame_idxs = OnlineLfrCmvn(vad_feats);
+ int reserve_frame_idx = lfr_splice_frame_idxs - minus_frame;
+ reserve_waveforms_.clear();
+ reserve_waveforms_.insert(reserve_waveforms_.begin(),
+ waves.begin() + reserve_frame_idx * frame_shift_sample_length_,
+ waves.begin() + frame_from_waves * frame_shift_sample_length_);
+ int sample_length = (frame_from_waves - 1) * frame_shift_sample_length_ + frame_sample_length_;
+ waves.erase(waves.begin() + sample_length, waves.end());
+ } else {
+ reserve_waveforms_.clear();
+ reserve_waveforms_.insert(reserve_waveforms_.begin(),
+ waves.begin() + frame_sample_length_ - frame_shift_sample_length_, waves.end());
+ lfr_splice_cache_.insert(lfr_splice_cache_.end(), vad_feats.begin(), vad_feats.end());
+ }
+
+ } else {
+ if (input_finished_) {
+ if (!reserve_waveforms_.empty()) {
+ waves = reserve_waveforms_;
+ }
+ vad_feats = lfr_splice_cache_;
+ OnlineLfrCmvn(vad_feats);
+ ResetCache();
+ }
+ }
+
+}
+
+int OnlineFeature::OnlineLfrCmvn(vector<vector<float>> &vad_feats) {
+ vector<vector<float>> out_feats;
+ int T = vad_feats.size();
+ int T_lrf = ceil((T - (lfr_m_ - 1) / 2) / lfr_n_);
+ int lfr_splice_frame_idxs = T_lrf;
+ vector<float> p;
+ for (int i = 0; i < T_lrf; i++) {
+ if (lfr_m_ <= T - i * lfr_n_) {
+ for (int j = 0; j < lfr_m_; j++) {
+ p.insert(p.end(), vad_feats[i * lfr_n_ + j].begin(), vad_feats[i * lfr_n_ + j].end());
+ }
+ out_feats.emplace_back(p);
+ p.clear();
+ } else {
+ if (input_finished_) {
+ int num_padding = lfr_m_ - (T - i * lfr_n_);
+ for (int j = 0; j < (vad_feats.size() - i * lfr_n_); j++) {
+ p.insert(p.end(), vad_feats[i * lfr_n_ + j].begin(), vad_feats[i * lfr_n_ + j].end());
+ }
+ for (int j = 0; j < num_padding; j++) {
+ p.insert(p.end(), vad_feats[vad_feats.size() - 1].begin(), vad_feats[vad_feats.size() - 1].end());
+ }
+ out_feats.emplace_back(p);
+ } else {
+ lfr_splice_frame_idxs = i;
+ break;
+ }
+ }
+ }
+ lfr_splice_frame_idxs = std::min(T - 1, lfr_splice_frame_idxs * lfr_n_);
+ lfr_splice_cache_.clear();
+ lfr_splice_cache_.insert(lfr_splice_cache_.begin(), vad_feats.begin() + lfr_splice_frame_idxs, vad_feats.end());
+
+ // Apply cmvn
+ for (auto &out_feat: out_feats) {
+ for (int j = 0; j < cmvns_[0].size(); j++) {
+ out_feat[j] = (out_feat[j] + cmvns_[0][j]) * cmvns_[1][j];
+ }
+ }
+ vad_feats = out_feats;
+ return lfr_splice_frame_idxs;
+}
+
+void OnlineFeature::OnlineFbank(vector<std::vector<float>> &vad_feats,
+ vector<float> &waves) {
+
+ knf::OnlineFbank fbank(fbank_opts_);
+ // cache merge
+ waves.insert(waves.begin(), input_cache_.begin(), input_cache_.end());
+ int frame_number = ComputeFrameNum(waves.size(), frame_sample_length_, frame_shift_sample_length_);
+ // Send the audio after the last frame shift position to the cache
+ input_cache_.clear();
+ input_cache_.insert(input_cache_.begin(), waves.begin() + frame_number * frame_shift_sample_length_, waves.end());
+ if (frame_number == 0) {
+ return;
+ }
+ // Delete audio that haven't undergone fbank processing
+ waves.erase(waves.begin() + (frame_number - 1) * frame_shift_sample_length_ + frame_sample_length_, waves.end());
+
+ fbank.AcceptWaveform(sample_rate_, &waves[0], waves.size());
+ int32_t frames = fbank.NumFramesReady();
+ for (int32_t i = 0; i != frames; ++i) {
+ const float *frame = fbank.GetFrame(i);
+ vector<float> frame_vector(frame, frame + fbank_opts_.mel_opts.num_bins);
+ vad_feats.emplace_back(frame_vector);
+ }
+
+}
diff --git a/funasr/runtime/onnxruntime/src/online-feature.h b/funasr/runtime/onnxruntime/src/online-feature.h
new file mode 100644
index 0000000..78245de
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/online-feature.h
@@ -0,0 +1,51 @@
+
+#include <vector>
+#include "precomp.h"
+
+using namespace std;
+
+class OnlineFeature {
+
+public:
+ OnlineFeature(int sample_rate, knf::FbankOptions fbank_opts, int lfr_m_, int lfr_n_,
+ std::vector<std::vector<float>> cmvns_);
+
+ void ExtractFeats(vector<vector<float>> &vad_feats, vector<float> waves, bool input_finished);
+
+private:
+ void OnlineFbank(vector<vector<float>> &vad_feats, vector<float> &waves);
+ int OnlineLfrCmvn(vector<vector<float>> &vad_feats);
+
+ static int ComputeFrameNum(int sample_length, int frame_sample_length, int frame_shift_sample_length) {
+ int frame_num = static_cast<int>((sample_length - frame_sample_length) / frame_shift_sample_length + 1);
+ if (frame_num >= 1 && sample_length >= frame_sample_length)
+ return frame_num;
+ else
+ return 0;
+ }
+
+ void ResetCache() {
+ reserve_waveforms_.clear();
+ input_cache_.clear();
+ lfr_splice_cache_.clear();
+ input_finished_ = false;
+
+ }
+
+ knf::FbankOptions fbank_opts_;
+ // The reserved waveforms by fbank
+ std::vector<float> reserve_waveforms_;
+ // waveforms reserved after last shift position
+ std::vector<float> input_cache_;
+ // lfr reserved cache
+ std::vector<std::vector<float>> lfr_splice_cache_;
+ std::vector<std::vector<float>> cmvns_;
+
+ int sample_rate_ = 16000;
+ int frame_sample_length_ = sample_rate_ / 1000 * 25;;
+ int frame_shift_sample_length_ = sample_rate_ / 1000 * 10;
+ int lfr_m_;
+ int lfr_n_;
+ bool input_finished_ = false;
+
+};
diff --git a/funasr/runtime/onnxruntime/src/paraformer.cpp b/funasr/runtime/onnxruntime/src/paraformer.cpp
new file mode 100644
index 0000000..72127f8
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/paraformer.cpp
@@ -0,0 +1,262 @@
+#include "precomp.h"
+
+using namespace std;
+using namespace paraformer;
+
+Paraformer::Paraformer(const char* path,int thread_num, bool quantize, bool use_vad, bool use_punc)
+:env_(ORT_LOGGING_LEVEL_ERROR, "paraformer"),session_options{}{
+ string model_path;
+ string cmvn_path;
+ string config_path;
+
+ // VAD model
+ if(use_vad){
+ string vad_path = PathAppend(path, "vad_model.onnx");
+ string mvn_path = PathAppend(path, "vad.mvn");
+ vad_handle = make_unique<FsmnVad>();
+ vad_handle->InitVad(vad_path, mvn_path, MODEL_SAMPLE_RATE, VAD_MAX_LEN, VAD_SILENCE_DYRATION, VAD_SPEECH_NOISE_THRES);
+ }
+
+ // PUNC model
+ if(use_punc){
+ punc_handle = make_unique<CTTransformer>(path, thread_num);
+ }
+
+ if(quantize)
+ {
+ model_path = PathAppend(path, "model_quant.onnx");
+ }else{
+ model_path = PathAppend(path, "model.onnx");
+ }
+ cmvn_path = PathAppend(path, "am.mvn");
+ config_path = PathAppend(path, "config.yaml");
+
+ // knf options
+ fbank_opts.frame_opts.dither = 0;
+ fbank_opts.mel_opts.num_bins = 80;
+ fbank_opts.frame_opts.samp_freq = MODEL_SAMPLE_RATE;
+ fbank_opts.frame_opts.window_type = "hamming";
+ fbank_opts.frame_opts.frame_shift_ms = 10;
+ fbank_opts.frame_opts.frame_length_ms = 25;
+ fbank_opts.energy_floor = 0;
+ fbank_opts.mel_opts.debug_mel = false;
+ // fbank_ = std::make_unique<knf::OnlineFbank>(fbank_opts);
+
+ // session_options.SetInterOpNumThreads(1);
+ session_options.SetIntraOpNumThreads(thread_num);
+ session_options.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
+ // DisableCpuMemArena can improve performance
+ session_options.DisableCpuMemArena();
+
+#ifdef _WIN32
+ wstring wstrPath = strToWstr(model_path);
+ m_session = std::make_unique<Ort::Session>(env_, model_path.c_str(), session_options);
+#else
+ m_session = std::make_unique<Ort::Session>(env_, model_path.c_str(), session_options);
+#endif
+
+ string strName;
+ GetInputName(m_session.get(), strName);
+ m_strInputNames.push_back(strName.c_str());
+ GetInputName(m_session.get(), strName,1);
+ m_strInputNames.push_back(strName);
+
+ GetOutputName(m_session.get(), strName);
+ m_strOutputNames.push_back(strName);
+ GetOutputName(m_session.get(), strName,1);
+ m_strOutputNames.push_back(strName);
+
+ for (auto& item : m_strInputNames)
+ m_szInputNames.push_back(item.c_str());
+ for (auto& item : m_strOutputNames)
+ m_szOutputNames.push_back(item.c_str());
+ vocab = new Vocab(config_path.c_str());
+ LoadCmvn(cmvn_path.c_str());
+}
+
+Paraformer::~Paraformer()
+{
+ if(vocab)
+ delete vocab;
+}
+
+void Paraformer::Reset()
+{
+}
+
+vector<std::vector<int>> Paraformer::VadSeg(std::vector<float>& pcm_data){
+ return vad_handle->Infer(pcm_data);
+}
+
+string Paraformer::AddPunc(const char* sz_input){
+ return punc_handle->AddPunc(sz_input);
+}
+
+vector<float> Paraformer::FbankKaldi(float sample_rate, const float* waves, int len) {
+ knf::OnlineFbank fbank_(fbank_opts);
+ fbank_.AcceptWaveform(sample_rate, waves, len);
+ //fbank_->InputFinished();
+ int32_t frames = fbank_.NumFramesReady();
+ int32_t feature_dim = fbank_opts.mel_opts.num_bins;
+ vector<float> features(frames * feature_dim);
+ float *p = features.data();
+
+ for (int32_t i = 0; i != frames; ++i) {
+ const float *f = fbank_.GetFrame(i);
+ std::copy(f, f + feature_dim, p);
+ p += feature_dim;
+ }
+
+ return features;
+}
+
+void Paraformer::LoadCmvn(const char *filename)
+{
+ ifstream cmvn_stream(filename);
+ string line;
+
+ while (getline(cmvn_stream, line)) {
+ istringstream iss(line);
+ vector<string> line_item{istream_iterator<string>{iss}, istream_iterator<string>{}};
+ if (line_item[0] == "<AddShift>") {
+ getline(cmvn_stream, line);
+ istringstream means_lines_stream(line);
+ vector<string> means_lines{istream_iterator<string>{means_lines_stream}, istream_iterator<string>{}};
+ if (means_lines[0] == "<LearnRateCoef>") {
+ for (int j = 3; j < means_lines.size() - 1; j++) {
+ means_list.push_back(stof(means_lines[j]));
+ }
+ continue;
+ }
+ }
+ else if (line_item[0] == "<Rescale>") {
+ getline(cmvn_stream, line);
+ istringstream vars_lines_stream(line);
+ vector<string> vars_lines{istream_iterator<string>{vars_lines_stream}, istream_iterator<string>{}};
+ if (vars_lines[0] == "<LearnRateCoef>") {
+ for (int j = 3; j < vars_lines.size() - 1; j++) {
+ vars_list.push_back(stof(vars_lines[j])*scale);
+ }
+ continue;
+ }
+ }
+ }
+}
+
+string Paraformer::GreedySearch(float * in, int n_len, int64_t token_nums)
+{
+ vector<int> hyps;
+ int Tmax = n_len;
+ for (int i = 0; i < Tmax; i++) {
+ int max_idx;
+ float max_val;
+ FindMax(in + i * token_nums, token_nums, max_val, max_idx);
+ hyps.push_back(max_idx);
+ }
+
+ return vocab->Vector2StringV2(hyps);
+}
+
+vector<float> Paraformer::ApplyLfr(const std::vector<float> &in)
+{
+ int32_t in_feat_dim = fbank_opts.mel_opts.num_bins;
+ int32_t in_num_frames = in.size() / in_feat_dim;
+ int32_t out_num_frames =
+ (in_num_frames - lfr_window_size) / lfr_window_shift + 1;
+ int32_t out_feat_dim = in_feat_dim * lfr_window_size;
+
+ std::vector<float> out(out_num_frames * out_feat_dim);
+
+ const float *p_in = in.data();
+ float *p_out = out.data();
+
+ for (int32_t i = 0; i != out_num_frames; ++i) {
+ std::copy(p_in, p_in + out_feat_dim, p_out);
+
+ p_out += out_feat_dim;
+ p_in += lfr_window_shift * in_feat_dim;
+ }
+
+ return out;
+ }
+
+ void Paraformer::ApplyCmvn(std::vector<float> *v)
+ {
+ int32_t dim = means_list.size();
+ int32_t num_frames = v->size() / dim;
+
+ float *p = v->data();
+
+ for (int32_t i = 0; i != num_frames; ++i) {
+ for (int32_t k = 0; k != dim; ++k) {
+ p[k] = (p[k] + means_list[k]) * vars_list[k];
+ }
+
+ p += dim;
+ }
+ }
+
+string Paraformer::Forward(float* din, int len, int flag)
+{
+
+ int32_t in_feat_dim = fbank_opts.mel_opts.num_bins;
+ std::vector<float> wav_feats = FbankKaldi(MODEL_SAMPLE_RATE, din, len);
+ wav_feats = ApplyLfr(wav_feats);
+ ApplyCmvn(&wav_feats);
+
+ int32_t feat_dim = lfr_window_size*in_feat_dim;
+ int32_t num_frames = wav_feats.size() / feat_dim;
+
+#ifdef _WIN_X86
+ Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+#else
+ Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+#endif
+
+ const int64_t input_shape_[3] = {1, num_frames, feat_dim};
+ Ort::Value onnx_feats = Ort::Value::CreateTensor<float>(m_memoryInfo,
+ wav_feats.data(),
+ wav_feats.size(),
+ input_shape_,
+ 3);
+
+ const int64_t paraformer_length_shape[1] = {1};
+ std::vector<int32_t> paraformer_length;
+ paraformer_length.emplace_back(num_frames);
+ Ort::Value onnx_feats_len = Ort::Value::CreateTensor<int32_t>(
+ m_memoryInfo, paraformer_length.data(), paraformer_length.size(), paraformer_length_shape, 1);
+
+ std::vector<Ort::Value> input_onnx;
+ input_onnx.emplace_back(std::move(onnx_feats));
+ input_onnx.emplace_back(std::move(onnx_feats_len));
+
+ string result;
+ try {
+ auto outputTensor = m_session->Run(Ort::RunOptions{nullptr}, m_szInputNames.data(), input_onnx.data(), input_onnx.size(), m_szOutputNames.data(), m_szOutputNames.size());
+ std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
+
+ int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
+ float* floatData = outputTensor[0].GetTensorMutableData<float>();
+ auto encoder_out_lens = outputTensor[1].GetTensorMutableData<int64_t>();
+ result = GreedySearch(floatData, *encoder_out_lens, outputShape[2]);
+ }
+ catch (std::exception const &e)
+ {
+ printf(e.what());
+ }
+
+ return result;
+}
+
+string Paraformer::ForwardChunk(float* din, int len, int flag)
+{
+
+ printf("Not Imp!!!!!!\n");
+ return "Hello";
+}
+
+string Paraformer::Rescoring()
+{
+ printf("Not Imp!!!!!!\n");
+ return "Hello";
+}
diff --git a/funasr/runtime/onnxruntime/src/paraformer.h b/funasr/runtime/onnxruntime/src/paraformer.h
new file mode 100644
index 0000000..5301932
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/paraformer.h
@@ -0,0 +1,58 @@
+#pragma once
+
+
+#ifndef PARAFORMER_MODELIMP_H
+#define PARAFORMER_MODELIMP_H
+
+#include "precomp.h"
+
+namespace paraformer {
+
+ class Paraformer : public Model {
+ /**
+ * Author: Speech Lab of DAMO Academy, Alibaba Group
+ * Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition
+ * https://arxiv.org/pdf/2206.08317.pdf
+ */
+ private:
+ //std::unique_ptr<knf::OnlineFbank> fbank_;
+ knf::FbankOptions fbank_opts;
+
+ std::unique_ptr<FsmnVad> vad_handle;
+ std::unique_ptr<CTTransformer> punc_handle;
+
+ Vocab* vocab;
+ vector<float> means_list;
+ vector<float> vars_list;
+ const float scale = 22.6274169979695;
+ int32_t lfr_window_size = 7;
+ int32_t lfr_window_shift = 6;
+
+ void LoadCmvn(const char *filename);
+ vector<float> ApplyLfr(const vector<float> &in);
+ void ApplyCmvn(vector<float> *v);
+
+ string GreedySearch( float* in, int n_len, int64_t token_nums);
+
+ std::shared_ptr<Ort::Session> m_session;
+ Ort::Env env_;
+ Ort::SessionOptions session_options;
+
+ vector<string> m_strInputNames, m_strOutputNames;
+ vector<const char*> m_szInputNames;
+ vector<const char*> m_szOutputNames;
+
+ public:
+ Paraformer(const char* path, int thread_num=0, bool quantize=false, bool use_vad=false, bool use_punc=false);
+ ~Paraformer();
+ void Reset();
+ vector<float> FbankKaldi(float sample_rate, const float* waves, int len);
+ string ForwardChunk(float* din, int len, int flag);
+ string Forward(float* din, int len, int flag);
+ string Rescoring();
+ std::vector<std::vector<int>> VadSeg(std::vector<float>& pcm_data);
+ string AddPunc(const char* sz_input);
+ };
+
+} // namespace paraformer
+#endif
diff --git a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp b/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
deleted file mode 100644
index 695e0f7..0000000
--- a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-#include "precomp.h"
-
-using namespace std;
-using namespace paraformer;
-
-ModelImp::ModelImp(const char* path,int nNumThread, bool quantize)
-:env_(ORT_LOGGING_LEVEL_ERROR, "paraformer"),sessionOptions{}{
- string model_path;
- string cmvn_path;
- string config_path;
-
- if(quantize)
- {
- model_path = pathAppend(path, "model_quant.onnx");
- }else{
- model_path = pathAppend(path, "model.onnx");
- }
- cmvn_path = pathAppend(path, "am.mvn");
- config_path = pathAppend(path, "config.yaml");
-
- fft_input = (float *)fftwf_malloc(sizeof(float) * fft_size);
- fft_out = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * fft_size);
- memset(fft_input, 0, sizeof(float) * fft_size);
- plan = fftwf_plan_dft_r2c_1d(fft_size, fft_input, fft_out, FFTW_ESTIMATE);
-
- //sessionOptions.SetInterOpNumThreads(1);
- sessionOptions.SetIntraOpNumThreads(nNumThread);
- sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
-
-#ifdef _WIN32
- wstring wstrPath = strToWstr(model_path);
- m_session = std::make_unique<Ort::Session>(env_, model_path.c_str(), sessionOptions);
-#else
- m_session = std::make_unique<Ort::Session>(env_, model_path.c_str(), sessionOptions);
-#endif
-
- string strName;
- getInputName(m_session.get(), strName);
- m_strInputNames.push_back(strName.c_str());
- getInputName(m_session.get(), strName,1);
- m_strInputNames.push_back(strName);
-
- getOutputName(m_session.get(), strName);
- m_strOutputNames.push_back(strName);
- getOutputName(m_session.get(), strName,1);
- m_strOutputNames.push_back(strName);
-
- for (auto& item : m_strInputNames)
- m_szInputNames.push_back(item.c_str());
- for (auto& item : m_strOutputNames)
- m_szOutputNames.push_back(item.c_str());
- vocab = new Vocab(config_path.c_str());
- load_cmvn(cmvn_path.c_str());
-}
-
-ModelImp::~ModelImp()
-{
- if(vocab)
- delete vocab;
- fftwf_free(fft_input);
- fftwf_free(fft_out);
- fftwf_destroy_plan(plan);
- fftwf_cleanup();
-}
-
-void ModelImp::reset()
-{
-}
-
-void ModelImp::apply_lfr(Tensor<float>*& din)
-{
- int mm = din->size[2];
- int ll = ceil(mm / 6.0);
- Tensor<float>* tmp = new Tensor<float>(ll, 560);
- int out_offset = 0;
- for (int i = 0; i < ll; i++) {
- for (int j = 0; j < 7; j++) {
- int idx = i * 6 + j - 3;
- if (idx < 0) {
- idx = 0;
- }
- if (idx >= mm) {
- idx = mm - 1;
- }
- memcpy(tmp->buff + out_offset, din->buff + idx * 80,
- sizeof(float) * 80);
- out_offset += 80;
- }
- }
- delete din;
- din = tmp;
-}
-
-void ModelImp::load_cmvn(const char *filename)
-{
- ifstream cmvn_stream(filename);
- string line;
-
- while (getline(cmvn_stream, line)) {
- istringstream iss(line);
- vector<string> line_item{istream_iterator<string>{iss}, istream_iterator<string>{}};
- if (line_item[0] == "<AddShift>") {
- getline(cmvn_stream, line);
- istringstream means_lines_stream(line);
- vector<string> means_lines{istream_iterator<string>{means_lines_stream}, istream_iterator<string>{}};
- if (means_lines[0] == "<LearnRateCoef>") {
- for (int j = 3; j < means_lines.size() - 1; j++) {
- means_list.push_back(stof(means_lines[j]));
- }
- continue;
- }
- }
- else if (line_item[0] == "<Rescale>") {
- getline(cmvn_stream, line);
- istringstream vars_lines_stream(line);
- vector<string> vars_lines{istream_iterator<string>{vars_lines_stream}, istream_iterator<string>{}};
- if (vars_lines[0] == "<LearnRateCoef>") {
- for (int j = 3; j < vars_lines.size() - 1; j++) {
- vars_list.push_back(stof(vars_lines[j])*scale);
- }
- continue;
- }
- }
- }
-}
-
-void ModelImp::apply_cmvn(Tensor<float>* din)
-{
- const float* var;
- const float* mean;
- var = vars_list.data();
- mean= means_list.data();
-
- int m = din->size[2];
- int n = din->size[3];
-
- for (int i = 0; i < m; i++) {
- for (int j = 0; j < n; j++) {
- int idx = i * n + j;
- din->buff[idx] = (din->buff[idx] + mean[j]) * var[j];
- }
- }
-}
-
-string ModelImp::greedy_search(float * in, int nLen )
-{
- vector<int> hyps;
- int Tmax = nLen;
- for (int i = 0; i < Tmax; i++) {
- int max_idx;
- float max_val;
- findmax(in + i * 8404, 8404, max_val, max_idx);
- hyps.push_back(max_idx);
- }
-
- return vocab->vector2stringV2(hyps);
-}
-
-string ModelImp::forward(float* din, int len, int flag)
-{
- Tensor<float>* in;
- FeatureExtract* fe = new FeatureExtract(3);
- fe->reset();
- fe->insert(plan, din, len, flag);
- fe->fetch(in);
- apply_lfr(in);
- apply_cmvn(in);
- Ort::RunOptions run_option;
-
-#ifdef _WIN_X86
- Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-#else
- Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
-#endif
-
- std::array<int64_t, 3> input_shape_{ in->size[0],in->size[2],in->size[3] };
- Ort::Value onnx_feats = Ort::Value::CreateTensor<float>(m_memoryInfo,
- in->buff,
- in->buff_size,
- input_shape_.data(),
- input_shape_.size());
-
- std::vector<int32_t> feats_len{ in->size[2] };
- std::vector<int64_t> feats_len_dim{ 1 };
- Ort::Value onnx_feats_len = Ort::Value::CreateTensor(
- m_memoryInfo,
- feats_len.data(),
- feats_len.size() * sizeof(int32_t),
- feats_len_dim.data(),
- feats_len_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32);
- std::vector<Ort::Value> input_onnx;
- input_onnx.emplace_back(std::move(onnx_feats));
- input_onnx.emplace_back(std::move(onnx_feats_len));
-
- string result;
- try {
-
- auto outputTensor = m_session->Run(run_option, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size());
- std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
-
- int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
- float* floatData = outputTensor[0].GetTensorMutableData<float>();
- auto encoder_out_lens = outputTensor[1].GetTensorMutableData<int64_t>();
- result = greedy_search(floatData, *encoder_out_lens);
- }
- catch (...)
- {
- result = "";
- }
-
- if(in){
- delete in;
- in = nullptr;
- }
- if(fe){
- delete fe;
- fe = nullptr;
- }
-
- return result;
-}
-
-string ModelImp::forward_chunk(float* din, int len, int flag)
-{
-
- printf("Not Imp!!!!!!\n");
- return "Hello";
-}
-
-string ModelImp::rescoring()
-{
- printf("Not Imp!!!!!!\n");
- return "Hello";
-}
diff --git a/funasr/runtime/onnxruntime/src/paraformer_onnx.h b/funasr/runtime/onnxruntime/src/paraformer_onnx.h
deleted file mode 100644
index 8946ae1..0000000
--- a/funasr/runtime/onnxruntime/src/paraformer_onnx.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#pragma once
-
-
-#ifndef PARAFORMER_MODELIMP_H
-#define PARAFORMER_MODELIMP_H
-
-namespace paraformer {
-
- class ModelImp : public Model {
- private:
- int fft_size=512;
- float *fft_input;
- fftwf_complex *fft_out;
- fftwf_plan plan;
-
- Vocab* vocab;
- vector<float> means_list;
- vector<float> vars_list;
- const float scale = 22.6274169979695;
-
- void apply_lfr(Tensor<float>*& din);
- void apply_cmvn(Tensor<float>* din);
- void load_cmvn(const char *filename);
-
- string greedy_search( float* in, int nLen);
-
- std::unique_ptr<Ort::Session> m_session;
- Ort::Env env_;
- Ort::SessionOptions sessionOptions;
-
- vector<string> m_strInputNames, m_strOutputNames;
- vector<const char*> m_szInputNames;
- vector<const char*> m_szOutputNames;
-
- public:
- ModelImp(const char* path, int nNumThread=0, bool quantize=false);
- ~ModelImp();
- void reset();
- string forward_chunk(float* din, int len, int flag);
- string forward(float* din, int len, int flag);
- string rescoring();
-
- };
-
-} // namespace paraformer
-#endif
diff --git a/funasr/runtime/onnxruntime/src/precomp.h b/funasr/runtime/onnxruntime/src/precomp.h
index 3aeed14..d567f15 100644
--- a/funasr/runtime/onnxruntime/src/precomp.h
+++ b/funasr/runtime/onnxruntime/src/precomp.h
@@ -1,6 +1,5 @@
#pragma once
// system
-
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
@@ -16,38 +15,31 @@
#include <string>
#include <math.h>
#include <numeric>
-
-
#include <cstring>
using namespace std;
// third part
-
-#include <fftw3.h>
#include "onnxruntime_run_options_config_keys.h"
#include "onnxruntime_cxx_api.h"
-
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+#include "kaldi-native-fbank/csrc/online-feature.h"
// mine
-
+#include "common-struct.h"
+#include "com-define.h"
#include "commonfunc.h"
-#include <ComDefine.h>
-#include "predefine_coe.h"
-
-#include <ComDefine.h>
-//#include "alignedmem.h"
-#include "Vocab.h"
-#include "Tensor.h"
+#include "predefine-coe.h"
+#include "tokenizer.h"
+#include "ct-transformer.h"
+#include "fsmn-vad.h"
+#include "e2e-vad.h"
+#include "vocab.h"
+#include "audio.h"
+#include "tensor.h"
#include "util.h"
-#include "CommonStruct.h"
-#include "FeatureExtract.h"
-#include "FeatureQueue.h"
-#include "SpeechWrap.h"
-#include <Audio.h>
#include "resample.h"
-#include "Model.h"
-#include "paraformer_onnx.h"
+#include "model.h"
+#include "paraformer.h"
#include "libfunasrapi.h"
-
using namespace paraformer;
diff --git a/funasr/runtime/onnxruntime/src/predefine_coe.h b/funasr/runtime/onnxruntime/src/predefine-coe.h
similarity index 100%
rename from funasr/runtime/onnxruntime/src/predefine_coe.h
rename to funasr/runtime/onnxruntime/src/predefine-coe.h
diff --git a/funasr/runtime/onnxruntime/src/Tensor.h b/funasr/runtime/onnxruntime/src/tensor.h
similarity index 98%
rename from funasr/runtime/onnxruntime/src/Tensor.h
rename to funasr/runtime/onnxruntime/src/tensor.h
index 68ac9aa..3b7a633 100644
--- a/funasr/runtime/onnxruntime/src/Tensor.h
+++ b/funasr/runtime/onnxruntime/src/tensor.h
@@ -71,7 +71,7 @@
{
buff_size = size[0] * size[1] * size[2] * size[3];
mem_size = buff_size;
- buff = (T *)aligned_malloc(32, buff_size * sizeof(T));
+ buff = (T *)AlignedMalloc(32, buff_size * sizeof(T));
}
template <typename T> void Tensor<T>::free_buff()
diff --git a/funasr/runtime/onnxruntime/src/tmp.h b/funasr/runtime/onnxruntime/src/tmp.h
deleted file mode 100644
index b57303f..0000000
--- a/funasr/runtime/onnxruntime/src/tmp.h
+++ /dev/null
@@ -1,112 +0,0 @@
-
-#ifndef WENETPARAMS_H
-#define WENETPARAMS_H
-// #pragma pack(1)
-
-#define vocab_size 5538
-
-typedef struct {
- float conv0_weight[512 * 9];
- float conv0_bias[512];
-
- float conv1_weight[512 * 512 * 9];
- float conv1_bias[512];
-
- float out0_weight[9728 * 512];
- float out0_bias[512];
-
-} EncEmbedParams;
-
-typedef struct {
- float linear_q_weight[512 * 512];
- float linear_q_bias[512];
- float linear_k_weight[512 * 512];
- float linear_k_bias[512];
- float linear_v_weight[512 * 512];
- float linear_v_bias[512];
- float linear_out_weight[512 * 512];
- float linear_out_bias[512];
-} SelfAttnParams;
-
-typedef struct {
- SelfAttnParams linear0;
- float linear_pos_weight[512 * 512];
- float pos_bias_u[512];
- float pos_bias_v[512];
-
-} EncSelfAttnParams;
-
-typedef struct {
- float w1_weight[512 * 2048];
- float w1_bias[2048];
- float w2_weight[2048 * 512];
- float w2_bias[512];
-} FeedForwardParams;
-
-typedef struct {
- float weight[512];
- float bias[512];
-} NormParams;
-
-typedef struct {
- float pointwise_conv1_weight[1024 * 512];
- float pointwise_conv1_bias[1024];
-
- float depthwise_conv_weight[512 * 15];
- float depthwise_conv_bias[512];
-
- float pointwise_conv2_weight[512 * 512];
- float pointwise_conv2_bias[512];
- NormParams norm;
-} EncConvParams;
-
-typedef struct {
- EncSelfAttnParams self_attn;
- FeedForwardParams feedforward;
- FeedForwardParams feedforward_macaron;
- EncConvParams conv_module;
- NormParams norm_ff;
- NormParams norm_mha;
- NormParams norm_macaron;
- NormParams norm_conv;
- NormParams norm_final;
- // float concat_weight[1024 * 512];
- // float concat_bias[512];
-} SubEncoderParams;
-
-typedef struct {
- EncEmbedParams embed;
- SubEncoderParams sub_encoder[12];
- NormParams after_norm;
-} EncoderParams;
-
-typedef struct {
- SelfAttnParams self_attn;
- SelfAttnParams src_attn;
- FeedForwardParams feedward;
- NormParams norm1;
- NormParams norm2;
- NormParams norm3;
- // float concat_weight1[1024 * 512];
- // float concat_bias1[512];
- // float concat_weight2[1024 * 512];
- // float concat_bias2[512];
-} SubDecoderParams;
-
-typedef struct {
- float embed_weight[vocab_size * 512];
- SubDecoderParams sub_decoder[6];
- NormParams after_norm;
- float output_weight[vocab_size * 512];
- float output_bias[vocab_size];
-} DecoderParams;
-
-typedef struct {
- EncoderParams encoder;
- float ctc_weight[512 * vocab_size];
- float ctc_bias[vocab_size];
- DecoderParams decoder;
-} WenetParams;
-
-// #pragma pack()
-#endif
diff --git a/funasr/runtime/onnxruntime/src/tokenizer.cpp b/funasr/runtime/onnxruntime/src/tokenizer.cpp
new file mode 100644
index 0000000..ef0c533
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/tokenizer.cpp
@@ -0,0 +1,208 @@
+ #include "precomp.h"
+
+CTokenizer::CTokenizer(const char* sz_yamlfile):m_ready(false)
+{
+ OpenYaml(sz_yamlfile);
+}
+
+CTokenizer::CTokenizer():m_ready(false)
+{
+}
+
+void CTokenizer::ReadYaml(const YAML::Node& node)
+{
+ if (node.IsMap())
+ {//锟斤拷map锟斤拷
+ for (auto it = node.begin(); it != node.end(); ++it)
+ {
+ ReadYaml(it->second);
+ }
+ }
+ if (node.IsSequence()) {//锟斤拷锟斤拷锟斤拷锟斤拷
+ for (size_t i = 0; i < node.size(); ++i) {
+ ReadYaml(node[i]);
+ }
+ }
+ if (node.IsScalar()) {//锟角憋拷锟斤拷锟斤拷
+ cout << node.as<string>() << endl;
+ }
+}
+
+bool CTokenizer::OpenYaml(const char* sz_yamlfile)
+{
+ YAML::Node m_Config = YAML::LoadFile(sz_yamlfile);
+ if (m_Config.IsNull())
+ return false;
+ try
+ {
+ auto Tokens = m_Config["token_list"];
+ if (Tokens.IsSequence())
+ {
+ for (size_t i = 0; i < Tokens.size(); ++i)
+ {
+ if (Tokens[i].IsScalar())
+ {
+ m_id2token.push_back(Tokens[i].as<string>());
+ m_token2id.insert(make_pair<string, int>(Tokens[i].as<string>(), i));
+ }
+ }
+ }
+ auto Puncs = m_Config["punc_list"];
+ if (Puncs.IsSequence())
+ {
+ for (size_t i = 0; i < Puncs.size(); ++i)
+ {
+ if (Puncs[i].IsScalar())
+ {
+ m_id2punc.push_back(Puncs[i].as<string>());
+ m_punc2id.insert(make_pair<string, int>(Puncs[i].as<string>(), i));
+ }
+ }
+ }
+ }
+ catch (YAML::BadFile& e) {
+ std::cout << "read error!" << std::endl;
+ return false;
+ }
+ m_ready = true;
+ return m_ready;
+}
+
+vector<string> CTokenizer::Id2String(vector<int> input)
+{
+ vector<string> result;
+ for (auto& item : input)
+ {
+ result.push_back(m_id2token[item]);
+ }
+ return result;
+}
+
+int CTokenizer::String2Id(string input)
+{
+ int nID = 0; // <blank>
+ if (m_token2id.find(input) != m_token2id.end())
+ nID=(m_token2id[input]);
+ else
+ nID=(m_token2id[UNK_CHAR]);
+ return nID;
+}
+
+vector<int> CTokenizer::String2Ids(vector<string> input)
+{
+ vector<int> result;
+ for (auto& item : input)
+ {
+ transform(item.begin(), item.end(), item.begin(), ::tolower);
+ if (m_token2id.find(item) != m_token2id.end())
+ result.push_back(m_token2id[item]);
+ else
+ result.push_back(m_token2id[UNK_CHAR]);
+ }
+ return result;
+}
+
+vector<string> CTokenizer::Id2Punc(vector<int> input)
+{
+ vector<string> result;
+ for (auto& item : input)
+ {
+ result.push_back(m_id2punc[item]);
+ }
+ return result;
+}
+
+string CTokenizer::Id2Punc(int n_punc_id)
+{
+ return m_id2punc[n_punc_id];
+}
+
+vector<int> CTokenizer::Punc2Ids(vector<string> input)
+{
+ vector<int> result;
+ for (auto& item : input)
+ {
+ result.push_back(m_punc2id[item]);
+ }
+ return result;
+}
+
+vector<string> CTokenizer::SplitChineseString(const string & str_info)
+{
+ vector<string> list;
+ int strSize = str_info.size();
+ int i = 0;
+
+ while (i < strSize) {
+ int len = 1;
+ for (int j = 0; j < 6 && (str_info[i] & (0x80 >> j)); j++) {
+ len = j + 1;
+ }
+ list.push_back(str_info.substr(i, len));
+ i += len;
+ }
+ return list;
+}
+
+void CTokenizer::StrSplit(const string& str, const char split, vector<string>& res)
+{
+ if (str == "")
+ {
+ return;
+ }
+ string&& strs = str + split;
+ size_t pos = strs.find(split);
+
+ while (pos != string::npos)
+ {
+ res.emplace_back(strs.substr(0, pos));
+ strs = move(strs.substr(pos + 1, strs.size()));
+ pos = strs.find(split);
+ }
+}
+
+ void CTokenizer::Tokenize(const char* str_info, vector<string> & str_out, vector<int> & id_out)
+{
+ vector<string> strList;
+ StrSplit(str_info,' ', strList);
+ string current_eng,current_chinese;
+ for (auto& item : strList)
+ {
+ current_eng = "";
+ current_chinese = "";
+ for (auto& ch : item)
+ {
+ if (!(ch& 0x80))
+ { // 英锟斤拷
+ if (current_chinese.size() > 0)
+ {
+ // for utf-8 chinese
+ auto chineseList = SplitChineseString(current_chinese);
+ str_out.insert(str_out.end(), chineseList.begin(),chineseList.end());
+ current_chinese = "";
+ }
+ current_eng += ch;
+ }
+ else
+ {
+ if (current_eng.size() > 0)
+ {
+ str_out.push_back(current_eng);
+ current_eng = "";
+ }
+ current_chinese += ch;
+ }
+ }
+ if (current_chinese.size() > 0)
+ {
+ auto chineseList = SplitChineseString(current_chinese);
+ str_out.insert(str_out.end(), chineseList.begin(), chineseList.end());
+ current_chinese = "";
+ }
+ if (current_eng.size() > 0)
+ {
+ str_out.push_back(current_eng);
+ }
+ }
+ id_out= String2Ids(str_out);
+}
diff --git a/funasr/runtime/onnxruntime/src/tokenizer.h b/funasr/runtime/onnxruntime/src/tokenizer.h
new file mode 100644
index 0000000..319975a
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/tokenizer.h
@@ -0,0 +1,27 @@
+#pragma once
+#include "yaml-cpp/yaml.h"
+
+class CTokenizer {
+private:
+
+ bool m_ready = false;
+ vector<string> m_id2token,m_id2punc;
+ map<string, int> m_token2id,m_punc2id;
+
+public:
+
+ CTokenizer(const char* sz_yamlfile);
+ CTokenizer();
+ bool OpenYaml(const char* sz_yamlfile);
+ void ReadYaml(const YAML::Node& node);
+ vector<string> Id2String(vector<int> input);
+ vector<int> String2Ids(vector<string> input);
+ int String2Id(string input);
+ vector<string> Id2Punc(vector<int> input);
+ string Id2Punc(int n_punc_id);
+ vector<int> Punc2Ids(vector<string> input);
+ vector<string> SplitChineseString(const string& str_info);
+ void StrSplit(const string& str, const char split, vector<string>& res);
+ void Tokenize(const char* str_info, vector<string>& str_out, vector<int>& id_out);
+
+};
diff --git a/funasr/runtime/onnxruntime/src/util.cpp b/funasr/runtime/onnxruntime/src/util.cpp
index 5a72c72..c5c27af 100644
--- a/funasr/runtime/onnxruntime/src/util.cpp
+++ b/funasr/runtime/onnxruntime/src/util.cpp
@@ -1,7 +1,7 @@
#include "precomp.h"
-float *loadparams(const char *filename)
+float *LoadParams(const char *filename)
{
FILE *fp;
@@ -10,20 +10,20 @@
uint32_t nFileLen = ftell(fp);
fseek(fp, 0, SEEK_SET);
- float *params_addr = (float *)aligned_malloc(32, nFileLen);
+ float *params_addr = (float *)AlignedMalloc(32, nFileLen);
int n = fread(params_addr, 1, nFileLen, fp);
fclose(fp);
return params_addr;
}
-int val_align(int val, int align)
+int ValAlign(int val, int align)
{
float tmp = ceil((float)val / (float)align) * (float)align;
return (int)tmp;
}
-void disp_params(float *din, int size)
+void DispParams(float *din, int size)
{
int i;
for (i = 0; i < size; i++) {
@@ -39,7 +39,7 @@
fclose(fp);
}
-void basic_norm(Tensor<float> *&din, float norm)
+void BasicNorm(Tensor<float> *&din, float norm)
{
int Tmax = din->size[2];
@@ -59,7 +59,7 @@
}
}
-void findmax(float *din, int len, float &max_val, int &max_idx)
+void FindMax(float *din, int len, float &max_val, int &max_idx)
{
int i;
max_val = -INFINITY;
@@ -72,7 +72,7 @@
}
}
-string pathAppend(const string &p1, const string &p2)
+string PathAppend(const string &p1, const string &p2)
{
char sep = '/';
@@ -89,7 +89,7 @@
return (p1 + p2);
}
-void relu(Tensor<float> *din)
+void Relu(Tensor<float> *din)
{
int i;
for (i = 0; i < din->buff_size; i++) {
@@ -98,7 +98,7 @@
}
}
-void swish(Tensor<float> *din)
+void Swish(Tensor<float> *din)
{
int i;
for (i = 0; i < din->buff_size; i++) {
@@ -107,7 +107,7 @@
}
}
-void sigmoid(Tensor<float> *din)
+void Sigmoid(Tensor<float> *din)
{
int i;
for (i = 0; i < din->buff_size; i++) {
@@ -116,7 +116,7 @@
}
}
-void doubleswish(Tensor<float> *din)
+void DoubleSwish(Tensor<float> *din)
{
int i;
for (i = 0; i < din->buff_size; i++) {
@@ -125,7 +125,7 @@
}
}
-void softmax(float *din, int mask, int len)
+void Softmax(float *din, int mask, int len)
{
float *tmp = (float *)malloc(mask * sizeof(float));
int i;
@@ -149,7 +149,7 @@
}
}
-void log_softmax(float *din, int len)
+void LogSoftmax(float *din, int len)
{
float *tmp = (float *)malloc(len * sizeof(float));
int i;
@@ -164,7 +164,7 @@
free(tmp);
}
-void glu(Tensor<float> *din, Tensor<float> *dout)
+void Glu(Tensor<float> *din, Tensor<float> *dout)
{
int mm = din->buff_size / 1024;
int i, j;
diff --git a/funasr/runtime/onnxruntime/src/util.h b/funasr/runtime/onnxruntime/src/util.h
index 48a27db..6327f7b 100644
--- a/funasr/runtime/onnxruntime/src/util.h
+++ b/funasr/runtime/onnxruntime/src/util.h
@@ -5,26 +5,26 @@
using namespace std;
-extern float *loadparams(const char *filename);
+extern float *LoadParams(const char *filename);
extern void SaveDataFile(const char *filename, void *data, uint32_t len);
-extern void relu(Tensor<float> *din);
-extern void swish(Tensor<float> *din);
-extern void sigmoid(Tensor<float> *din);
-extern void doubleswish(Tensor<float> *din);
+extern void Relu(Tensor<float> *din);
+extern void Swish(Tensor<float> *din);
+extern void Sigmoid(Tensor<float> *din);
+extern void DoubleSwish(Tensor<float> *din);
-extern void softmax(float *din, int mask, int len);
+extern void Softmax(float *din, int mask, int len);
-extern void log_softmax(float *din, int len);
-extern int val_align(int val, int align);
-extern void disp_params(float *din, int size);
+extern void LogSoftmax(float *din, int len);
+extern int ValAlign(int val, int align);
+extern void DispParams(float *din, int size);
-extern void basic_norm(Tensor<float> *&din, float norm);
+extern void BasicNorm(Tensor<float> *&din, float norm);
-extern void findmax(float *din, int len, float &max_val, int &max_idx);
+extern void FindMax(float *din, int len, float &max_val, int &max_idx);
-extern void glu(Tensor<float> *din, Tensor<float> *dout);
+extern void Glu(Tensor<float> *din, Tensor<float> *dout);
-string pathAppend(const string &p1, const string &p2);
+string PathAppend(const string &p1, const string &p2);
#endif
diff --git a/funasr/runtime/onnxruntime/src/Vocab.cpp b/funasr/runtime/onnxruntime/src/vocab.cpp
similarity index 88%
rename from funasr/runtime/onnxruntime/src/Vocab.cpp
rename to funasr/runtime/onnxruntime/src/vocab.cpp
index b54a6c6..ba041b8 100644
--- a/funasr/runtime/onnxruntime/src/Vocab.cpp
+++ b/funasr/runtime/onnxruntime/src/vocab.cpp
@@ -1,4 +1,4 @@
-#include "Vocab.h"
+#include "vocab.h"
#include "yaml-cpp/yaml.h"
#include <fstream>
@@ -12,13 +12,13 @@
Vocab::Vocab(const char *filename)
{
ifstream in(filename);
- loadVocabFromYaml(filename);
+ LoadVocabFromYaml(filename);
}
Vocab::~Vocab()
{
}
-void Vocab::loadVocabFromYaml(const char* filename){
+void Vocab::LoadVocabFromYaml(const char* filename){
YAML::Node config;
try{
config = YAML::LoadFile(filename);
@@ -26,72 +26,62 @@
printf("error loading file, yaml file error or not exist.\n");
exit(-1);
}
-
YAML::Node myList = config["token_list"];
for (YAML::const_iterator it = myList.begin(); it != myList.end(); ++it) {
vocab.push_back(it->as<string>());
}
}
-string Vocab::vector2string(vector<int> in)
+string Vocab::Vector2String(vector<int> in)
{
int i;
stringstream ss;
for (auto it = in.begin(); it != in.end(); it++) {
ss << vocab[*it];
}
-
return ss.str();
}
-int str2int(string str)
+int Str2Int(string str)
{
const char *ch_array = str.c_str();
if (((ch_array[0] & 0xf0) != 0xe0) || ((ch_array[1] & 0xc0) != 0x80) ||
((ch_array[2] & 0xc0) != 0x80))
return 0;
-
int val = ((ch_array[0] & 0x0f) << 12) | ((ch_array[1] & 0x3f) << 6) |
(ch_array[2] & 0x3f);
return val;
}
-bool Vocab::isChinese(string ch)
+bool Vocab::IsChinese(string ch)
{
if (ch.size() != 3) {
return false;
}
-
- int unicode = str2int(ch);
+ int unicode = Str2Int(ch);
if (unicode >= 19968 && unicode <= 40959) {
return true;
}
-
return false;
}
-string Vocab::vector2stringV2(vector<int> in)
+string Vocab::Vector2StringV2(vector<int> in)
{
int i;
list<string> words;
-
int is_pre_english = false;
int pre_english_len = 0;
-
int is_combining = false;
string combine = "";
for (auto it = in.begin(); it != in.end(); it++) {
string word = vocab[*it];
-
// step1 space character skips
if (word == "<s>" || word == "</s>" || word == "<unk>")
continue;
-
// step2 combie phoneme to full word
{
int sub_word = !(word.find("@@") == string::npos);
-
// process word start and middle part
if (sub_word) {
combine += word.erase(word.length() - 2);
@@ -109,15 +99,13 @@
// step3 process english word deal with space , turn abbreviation to upper case
{
-
// input word is chinese, not need process
- if (isChinese(word)) {
+ if (IsChinese(word)) {
words.push_back(word);
is_pre_english = false;
}
// input word is english word
else {
-
// pre word is chinese
if (!is_pre_english) {
word[0] = word[0] - 32;
@@ -125,10 +113,8 @@
pre_english_len = word.size();
}
-
// pre word is english word
else {
-
// single letter turn to upper case
if (word.size() == 1) {
word[0] = word[0] - 32;
@@ -147,16 +133,10 @@
pre_english_len = word.size();
}
}
-
is_pre_english = true;
-
}
}
}
-
- // for (auto it = words.begin(); it != words.end(); it++) {
- // cout << *it << endl;
- // }
stringstream ss;
for (auto it = words.begin(); it != words.end(); it++) {
@@ -166,7 +146,7 @@
return ss.str();
}
-int Vocab::size()
+int Vocab::Size()
{
return vocab.size();
}
diff --git a/funasr/runtime/onnxruntime/src/vocab.h b/funasr/runtime/onnxruntime/src/vocab.h
new file mode 100644
index 0000000..a3fdf65
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/vocab.h
@@ -0,0 +1,25 @@
+
+#ifndef VOCAB_H
+#define VOCAB_H
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+using namespace std;
+
+class Vocab {
+ private:
+ vector<string> vocab;
+ bool IsChinese(string ch);
+ bool IsEnglish(string ch);
+ void LoadVocabFromYaml(const char* filename);
+
+ public:
+ Vocab(const char *filename);
+ ~Vocab();
+ int Size();
+ string Vector2String(vector<int> in);
+ string Vector2StringV2(vector<int> in);
+};
+
+#endif
diff --git a/funasr/runtime/onnxruntime/tester/CMakeLists.txt b/funasr/runtime/onnxruntime/tester/CMakeLists.txt
deleted file mode 100644
index e3224e3..0000000
--- a/funasr/runtime/onnxruntime/tester/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-if(WIN32)
- if(CMAKE_CL_64)
- link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x64 )
- else()
- link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x86 )
- endif()
-endif()
-
-set(EXTRA_LIBS funasr)
-
-
-include_directories(${CMAKE_SOURCE_DIR}/include)
-set(EXECNAME "tester")
-set(EXECNAMERTF "tester_rtf")
-
-add_executable(${EXECNAME} "tester.cpp")
-target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS})
-
-add_executable(${EXECNAMERTF} "tester_rtf.cpp")
-target_link_libraries(${EXECNAMERTF} PUBLIC ${EXTRA_LIBS})
-
diff --git a/funasr/runtime/onnxruntime/tester/tester_rtf.cpp b/funasr/runtime/onnxruntime/tester/tester_rtf.cpp
deleted file mode 100644
index dd79887..0000000
--- a/funasr/runtime/onnxruntime/tester/tester_rtf.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-
-#ifndef _WIN32
-#include <sys/time.h>
-#else
-#include <win_func.h>
-#endif
-
-#include "libfunasrapi.h"
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <vector>
-using namespace std;
-
-int main(int argc, char *argv[])
-{
-
- if (argc < 4)
- {
- printf("Usage: %s /path/to/model_dir /path/to/wav.scp quantize(true or false) \n", argv[0]);
- exit(-1);
- }
-
- // read wav.scp
- vector<string> wav_list;
- ifstream in(argv[2]);
- if (!in.is_open()) {
- printf("Failed to open file: %s", argv[2]);
- return 0;
- }
- string line;
- while(getline(in, line))
- {
- istringstream iss(line);
- string column1, column2;
- iss >> column1 >> column2;
- wav_list.push_back(column2);
- }
- in.close();
-
- // model init
- struct timeval start, end;
- gettimeofday(&start, NULL);
- int nThreadNum = 1;
- // is quantize
- bool quantize = false;
- istringstream(argv[3]) >> boolalpha >> quantize;
-
- FUNASR_HANDLE AsrHanlde=FunASRInit(argv[1], nThreadNum, quantize);
- if (!AsrHanlde)
- {
- printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
- exit(-1);
- }
- gettimeofday(&end, NULL);
- long seconds = (end.tv_sec - start.tv_sec);
- long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
- printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);
-
- // warm up
- for (size_t i = 0; i < 30; i++)
- {
- FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, wav_list[0].c_str(), RASR_NONE, NULL);
- }
-
- // forward
- float snippet_time = 0.0f;
- float total_length = 0.0f;
- long total_time = 0.0f;
-
- for (size_t i = 0; i < wav_list.size(); i++)
- {
- gettimeofday(&start, NULL);
- FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, wav_list[i].c_str(), RASR_NONE, NULL);
- gettimeofday(&end, NULL);
- seconds = (end.tv_sec - start.tv_sec);
- long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
- total_time += taking_micros;
-
- if(Result){
- string msg = FunASRGetResult(Result, 0);
- printf("Result: %s \n", msg.c_str());
-
- snippet_time = FunASRGetRetSnippetTime(Result);
- total_length += snippet_time;
- FunASRFreeResult(Result);
- }else{
- cout <<"No return data!";
- }
- }
-
- printf("total_time_wav %ld ms.\n", (long)(total_length * 1000));
- printf("total_time_comput %ld ms.\n", total_time / 1000);
- printf("total_rtf %05lf .\n", (double)total_time/ (total_length*1000000));
-
- FunASRUninit(AsrHanlde);
- return 0;
-}
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.clang-format b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.clang-format
new file mode 100644
index 0000000..c65e772
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.clang-format
@@ -0,0 +1,9 @@
+---
+BasedOnStyle: Google
+---
+Language: Cpp
+Cpp11BracedListStyle: true
+Standard: Cpp11
+DerivePointerAlignment: false
+PointerAlignment: Right
+---
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/linux-macos.yaml b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/linux-macos.yaml
new file mode 100644
index 0000000..834e7d7
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/linux-macos.yaml
@@ -0,0 +1,70 @@
+name: linux-macos
+
+on:
+ push:
+ branches:
+ - master
+ pull_request:
+ branches:
+ - master
+
+concurrency:
+ group: linux-macos-${{ github.ref }}
+ cancel-in-progress: true
+
+permissions:
+ contents: read
+
+jobs:
+ linux_macos:
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: [ubuntu-latest, macos-latest]
+
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+
+ - name: Configure Cmake
+ shell: bash
+ run: |
+ mkdir build
+ cd build
+ cmake -D CMAKE_BUILD_TYPE=Release ..
+
+ - name: Build kaldi-native-fbank for ubuntu/macos
+ if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'macos')
+ run: |
+ cd build
+ make -j2
+ ctest --output-on-failure
+
+ - name: Run tests for ubuntu/macos
+ if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'macos')
+ run: |
+ cd build
+ ctest --output-on-failure
+
+ - name: Setup Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: "3.8"
+
+ - name: Install Python dependencies
+ shell: bash
+ run: |
+ python3 -m pip install --upgrade pip
+ python3 -m pip install wheel twine setuptools
+
+ - name: Build Python
+ shell: bash
+ run: |
+ python3 setup.py install
+
+ - name: Display Python package version
+ shell: bash
+ run: |
+ python3 -c "import kaldi_native_fbank; print(kaldi_native_fbank.__version__)"
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x64.yaml b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x64.yaml
new file mode 100644
index 0000000..0939147
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x64.yaml
@@ -0,0 +1,97 @@
+name: windows-x64
+
+on:
+ push:
+ branches:
+ - master
+ pull_request:
+ branches:
+ - master
+
+concurrency:
+ group: windows-x64-${{ github.ref }}
+ cancel-in-progress: true
+
+permissions:
+ contents: read
+
+jobs:
+ windows_x64:
+ runs-on: ${{ matrix.os }}
+ name: ${{ matrix.vs-version }}
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - vs-version: vs2015
+ toolset-version: v140
+ os: windows-2019
+
+ - vs-version: vs2017
+ toolset-version: v141
+ os: windows-2019
+
+ - vs-version: vs2019
+ toolset-version: v142
+ os: windows-2022
+
+ - vs-version: vs2022
+ toolset-version: v143
+ os: windows-2022
+
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+
+ - name: Configure CMake
+ shell: bash
+ run: |
+ mkdir build
+ cd build
+ cmake -T ${{ matrix.toolset-version}},host=x64 -D CMAKE_BUILD_TYPE=Release ..
+
+
+ - name: Build kaldi-native-fbank for windows
+ shell: bash
+ run: |
+ cd build
+ cmake --build . --target ALL_BUILD --config Release
+ cat CMakeCache.txt
+
+ - name: Run tests for windows
+ shell: bash
+ run: |
+ cd build
+ ctest --verbose -C Release --output-on-failure -E py
+
+ - name: Setup Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: "3.8"
+
+ - name: Install Python dependencies
+ shell: bash
+ run: |
+ export KALDI_NATIVE_FBANK_CMAKE_ARGS="-T ${{ matrix.toolset-version}},host=x64 -A x64"
+ python3 -m pip install --upgrade pip pytest
+ python3 -m pip install wheel twine setuptools
+
+ - name: Build Python
+ shell: bash
+ run: |
+ export KALDI_NATIVE_FBANK_CMAKE_ARGS="-T ${{ matrix.toolset-version}},host=x64 -A x64"
+ python3 setup.py install
+
+ - name: Display Python package version
+ shell: bash
+ run: |
+ python3 -c "import kaldi_native_fbank; print(kaldi_native_fbank.__version__)"
+
+ - name: Run Python tests
+ shell: bash
+ run: |
+ cd kaldi-native-fbank/python/tests
+ python3 ./test_fbank_options.py
+ python3 ./test_frame_extraction_options.py
+ python3 ./test_mel_bank_options.py
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x86.yaml b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x86.yaml
new file mode 100644
index 0000000..2808702
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x86.yaml
@@ -0,0 +1,67 @@
+name: windows-x86
+
+on:
+ push:
+ branches:
+ - master
+ pull_request:
+ branches:
+ - master
+
+concurrency:
+ group: windows-x86-${{ github.ref }}
+ cancel-in-progress: true
+
+permissions:
+ contents: read
+
+jobs:
+ windows_x86:
+ runs-on: ${{ matrix.os }}
+ name: ${{ matrix.vs-version }}
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - vs-version: vs2015
+ toolset-version: v140
+ os: windows-2019
+
+ - vs-version: vs2017
+ toolset-version: v141
+ os: windows-2019
+
+ - vs-version: vs2019
+ toolset-version: v142
+ os: windows-2022
+
+ - vs-version: vs2022
+ toolset-version: v143
+ os: windows-2022
+
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+
+ - name: Configure CMake
+ shell: bash
+ run: |
+ mkdir build
+ cd build
+ cmake -T ${{ matrix.toolset-version}},host=x64 -A Win32 -D CMAKE_BUILD_TYPE=Release -DKALDI_NATIVE_FBANK_BUILD_PYTHON=OFF ..
+ cat CMakeCache.txt
+
+
+ - name: Build kaldi-native-fbank for windows
+ shell: bash
+ run: |
+ cd build
+ cmake --build . --target ALL_BUILD --config Release
+
+ - name: Run tests for windows
+ shell: bash
+ run: |
+ cd build
+
+ ctest --verbose -C Release --output-on-failure -E py
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/CMakeLists.txt
new file mode 100644
index 0000000..e931a03
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/CMakeLists.txt
@@ -0,0 +1,115 @@
+if("x${CMAKE_SOURCE_DIR}" STREQUAL "x${CMAKE_BINARY_DIR}")
+ message(FATAL_ERROR "\
+In-source build is not a good practice.
+Please use:
+ mkdir build
+ cd build
+ cmake ..
+to build this project"
+ )
+endif()
+
+if(CMAKE_TOOLCHAIN_FILE)
+ set(_BUILD_PYTHON OFF)
+ set(_BUILD_TESTS OFF)
+else()
+ set(_BUILD_PYTHON ON)
+ set(_BUILD_TESTS ON)
+endif()
+
+if(POLICY CMP0057)
+ cmake_policy(SET CMP0057 NEW)
+endif()
+
+cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
+
+project(kaldi-native-fbank CXX C)
+
+set(KALDI_NATIVE_FBANK_VERSION "1.13")
+
+if(NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
+
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
+
+set(CMAKE_SKIP_BUILD_RPATH FALSE)
+set(BUILD_RPATH_USE_ORIGIN TRUE)
+set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+
+if(NOT APPLE)
+ set(kaldi_native_fbank_rpath_origin "$ORIGIN")
+else()
+ set(kaldi_native_fbank_rpath_origin "@loader_path")
+endif()
+
+set(CMAKE_INSTALL_RPATH ${kaldi_native_fbank_rpath_origin})
+set(CMAKE_BUILD_RPATH ${kaldi_native_fbank_rpath_origin})
+
+set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
+
+if(NOT DEFINED BUILD_SHARED_LIBS)
+ set(BUILD_SHARED_LIBS ON)
+endif()
+message(STATUS "BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
+
+option(KALDI_NATIVE_FBANK_BUILD_TESTS "Whether to build tests or not" ${_BUILD_TESTS})
+option(KALDI_NATIVE_FBANK_BUILD_PYTHON "Whether to build Python extension" ${_BUILD_PYTHON})
+option(KALDI_NATIVE_FBANK_ENABLE_CHECK "Whether to build with log" OFF)
+
+message(STATUS "KALDI_NATIVE_FBANK_BUILD_TESTS: ${KALDI_NATIVE_FBANK_BUILD_TESTS}")
+message(STATUS "KALDI_NATIVE_FBANK_BUILD_PYTHON: ${KALDI_NATIVE_FBANK_BUILD_PYTHON}")
+message(STATUS "KALDI_NATIVE_FBANK_ENABLE_CHECK: ${KALDI_NATIVE_FBANK_ENABLE_CHECK}")
+
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
+
+message(STATUS "KALDI_NATIVE_FBANK_ENABLE_CHECK: ${KALDI_NATIVE_FBANK_ENABLE_CHECK}")
+
+if(WIN32)
+ add_definitions(-DNOMINMAX) # Otherwise, std::max() and std::min() won't work
+endif()
+
+if(KALDI_NATIVE_FBANK_BUILD_PYTHON)
+ include(pybind11)
+endif()
+
+if(KALDI_NATIVE_FBANK_BUILD_TESTS)
+ enable_testing()
+ include(googletest)
+endif()
+
+if(NOT CMAKE_INSTALL_PREFIX)
+ set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/install")
+endif()
+
+message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
+message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
+
+include(CheckIncludeFileCXX)
+check_include_file_cxx(cxxabi.h KNF_HAVE_CXXABI_H)
+check_include_file_cxx(execinfo.h KNF_HAVE_EXECINFO_H)
+
+include_directories(${CMAKE_SOURCE_DIR})
+
+if(WIN32 AND MSVC)
+ # disable various warnings for MSVC
+ # 4244: '=': conversion from 'double' to 'float', possible loss of data
+ # 4267: 'return': conversion from 'size_t' to 'int32_t', possible loss of data
+ # 4624: destructor was implicitly defined as deleted because a base class destructor is inaccessible or deleted
+ set(disabled_warnings
+ /wd4244
+ /wd4267
+ /wd4624
+ )
+ message(STATUS "Disabled warnings: ${disabled_warnings}")
+ foreach(w IN LISTS disabled_warnings)
+ string(APPEND CMAKE_CXX_FLAGS " ${w} ")
+ endforeach()
+endif()
+
+add_subdirectory(kaldi-native-fbank)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/LICENSE b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/LICENSE
new file mode 100644
index 0000000..ee06cfc
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/LICENSE
@@ -0,0 +1,211 @@
+
+ Legal Notices
+
+ NOTE (this is not from the Apache License): The copyright model is that
+ authors (or their employers, if noted in individual files) own their
+ individual contributions. The authors' contributions can be discerned
+ from the git history.
+
+ -------------------------------------------------------------------------
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/MANIFEST.in b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/MANIFEST.in
new file mode 100644
index 0000000..ae2b482
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/MANIFEST.in
@@ -0,0 +1,5 @@
+include LICENSE
+include README.md
+include CMakeLists.txt
+recursive-include kaldi-native-fbank *.*
+recursive-include cmake *.*
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/README.md b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/README.md
new file mode 100644
index 0000000..2c874c1
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/README.md
@@ -0,0 +1,106 @@
+# Introduction
+
+Kaldi-compatible online fbank feature extractor without external dependencies.
+
+Tested on the following architectures and operating systems:
+
+ - Linux
+ - macOS
+ - Windows
+ - Android
+ - x86
+ - arm
+ - aarch64
+
+# Usage
+
+See the following CMake-based speech recognition (i.e., text-to-speech) projects
+for its usage:
+
+- <https://github.com/k2-fsa/sherpa-ncnn>
+ - Specifically, please have a look at <https://github.com/k2-fsa/sherpa-ncnn/blob/master/sherpa-ncnn/csrc/features.h>
+- <https://github.com/k2-fsa/sherpa-onnx>
+
+They use `kaldi-native-fbank` to compute fbank features for **real-time**
+speech recognition.
+
+# Python APIs
+
+First, please install `kaldi-native-fbank` by
+
+```bash
+git clone https://github.com/csukuangfj/kaldi-native-fbank
+cd kaldi-native-fbank
+python3 setup.py install
+```
+
+or use
+
+```bash
+pip install kaldi-native-fbank
+```
+
+To check that you have installed `kaldi-native-fbank` successfully, please use
+
+```
+python3 -c "import kaldi_native_fbank; print(kaldi_native_fbank.__version__)"
+```
+
+which should print the version you have installed.
+
+Please refer to
+<https://github.com/csukuangfj/kaldi-native-fbank/blob/master/kaldi-native-fbank/python/tests/test_online_fbank.py>
+for usages.
+
+For easier reference, we post the above file below:
+
+```python3
+#!/usr/bin/env python3
+
+import sys
+
+try:
+ import kaldifeat
+except:
+ print("Please install kaldifeat first")
+ sys.exit(0)
+
+import kaldi_native_fbank as knf
+import torch
+
+
+def main():
+ sampling_rate = 16000
+ samples = torch.randn(16000 * 10)
+
+ opts = kaldifeat.FbankOptions()
+ opts.frame_opts.dither = 0
+ opts.mel_opts.num_bins = 80
+ opts.frame_opts.snip_edges = False
+ opts.mel_opts.debug_mel = False
+
+ online_fbank = kaldifeat.OnlineFbank(opts)
+
+ online_fbank.accept_waveform(sampling_rate, samples)
+
+ opts = knf.FbankOptions()
+ opts.frame_opts.dither = 0
+ opts.mel_opts.num_bins = 80
+ opts.frame_opts.snip_edges = False
+ opts.mel_opts.debug_mel = False
+
+ fbank = knf.OnlineFbank(opts)
+ fbank.accept_waveform(sampling_rate, samples.tolist())
+
+ assert online_fbank.num_frames_ready == fbank.num_frames_ready
+ for i in range(fbank.num_frames_ready):
+ f1 = online_fbank.get_frame(i)
+ f2 = torch.from_numpy(fbank.get_frame(i))
+ assert torch.allclose(f1, f2, atol=1e-3), (i, (f1 - f2).abs().max())
+
+
+if __name__ == "__main__":
+ torch.manual_seed(20220825)
+ main()
+ print("success")
+```
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent.cmake b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent.cmake
new file mode 100644
index 0000000..98cdf6c
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent.cmake
@@ -0,0 +1,916 @@
+# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+FetchContent
+------------------
+
+.. only:: html
+
+ .. contents::
+
+Overview
+^^^^^^^^
+
+This module enables populating content at configure time via any method
+supported by the :module:`ExternalProject` module. Whereas
+:command:`ExternalProject_Add` downloads at build time, the
+``FetchContent`` module makes content available immediately, allowing the
+configure step to use the content in commands like :command:`add_subdirectory`,
+:command:`include` or :command:`file` operations.
+
+Content population details would normally be defined separately from the
+command that performs the actual population. Projects should also
+check whether the content has already been populated somewhere else in the
+project hierarchy. Typical usage would look something like this:
+
+.. code-block:: cmake
+
+ FetchContent_Declare(
+ googletest
+ GIT_REPOSITORY https://github.com/google/googletest.git
+ GIT_TAG release-1.8.0
+ )
+
+ FetchContent_GetProperties(googletest)
+ if(NOT googletest_POPULATED)
+ FetchContent_Populate(googletest)
+ add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR})
+ endif()
+
+When using the above pattern with a hierarchical project arrangement,
+projects at higher levels in the hierarchy are able to define or override
+the population details of content specified anywhere lower in the project
+hierarchy. The ability to detect whether content has already been
+populated ensures that even if multiple child projects want certain content
+to be available, the first one to populate it wins. The other child project
+can simply make use of the already available content instead of repeating
+the population for itself. See the
+:ref:`Examples <fetch-content-examples>` section which demonstrates
+this scenario.
+
+The ``FetchContent`` module also supports defining and populating
+content in a single call, with no check for whether the content has been
+populated elsewhere in the project already. This is a more low level
+operation and would not normally be the way the module is used, but it is
+sometimes useful as part of implementing some higher level feature or to
+populate some content in CMake's script mode.
+
+
+Declaring Content Details
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. command:: FetchContent_Declare
+
+ .. code-block:: cmake
+
+ FetchContent_Declare(<name> <contentOptions>...)
+
+ The ``FetchContent_Declare()`` function records the options that describe
+ how to populate the specified content, but if such details have already
+ been recorded earlier in this project (regardless of where in the project
+ hierarchy), this and all later calls for the same content ``<name>`` are
+ ignored. This "first to record, wins" approach is what allows hierarchical
+ projects to have parent projects override content details of child projects.
+
+ The content ``<name>`` can be any string without spaces, but good practice
+ would be to use only letters, numbers and underscores. The name will be
+ treated case-insensitively and it should be obvious for the content it
+ represents, often being the name of the child project or the value given
+ to its top level :command:`project` command (if it is a CMake project).
+ For well-known public projects, the name should generally be the official
+ name of the project. Choosing an unusual name makes it unlikely that other
+ projects needing that same content will use the same name, leading to
+ the content being populated multiple times.
+
+ The ``<contentOptions>`` can be any of the download or update/patch options
+ that the :command:`ExternalProject_Add` command understands. The configure,
+ build, install and test steps are explicitly disabled and therefore options
+ related to them will be ignored. In most cases, ``<contentOptions>`` will
+ just be a couple of options defining the download method and method-specific
+ details like a commit tag or archive hash. For example:
+
+ .. code-block:: cmake
+
+ FetchContent_Declare(
+ googletest
+ GIT_REPOSITORY https://github.com/google/googletest.git
+ GIT_TAG release-1.8.0
+ )
+
+ FetchContent_Declare(
+ myCompanyIcons
+ URL https://intranet.mycompany.com/assets/iconset_1.12.tar.gz
+ URL_HASH 5588a7b18261c20068beabfb4f530b87
+ )
+
+ FetchContent_Declare(
+ myCompanyCertificates
+ SVN_REPOSITORY svn+ssh://svn.mycompany.com/srv/svn/trunk/certs
+ SVN_REVISION -r12345
+ )
+
+Populating The Content
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. command:: FetchContent_Populate
+
+ .. code-block:: cmake
+
+ FetchContent_Populate( <name> )
+
+ In most cases, the only argument given to ``FetchContent_Populate()`` is the
+ ``<name>``. When used this way, the command assumes the content details have
+ been recorded by an earlier call to :command:`FetchContent_Declare`. The
+ details are stored in a global property, so they are unaffected by things
+ like variable or directory scope. Therefore, it doesn't matter where in the
+ project the details were previously declared, as long as they have been
+ declared before the call to ``FetchContent_Populate()``. Those saved details
+ are then used to construct a call to :command:`ExternalProject_Add` in a
+ private sub-build to perform the content population immediately. The
+ implementation of ``ExternalProject_Add()`` ensures that if the content has
+ already been populated in a previous CMake run, that content will be reused
+ rather than repopulating them again. For the common case where population
+ involves downloading content, the cost of the download is only paid once.
+
+ An internal global property records when a particular content population
+ request has been processed. If ``FetchContent_Populate()`` is called more
+ than once for the same content name within a configure run, the second call
+ will halt with an error. Projects can and should check whether content
+ population has already been processed with the
+ :command:`FetchContent_GetProperties` command before calling
+ ``FetchContent_Populate()``.
+
+ ``FetchContent_Populate()`` will set three variables in the scope of the
+ caller; ``<lcName>_POPULATED``, ``<lcName>_SOURCE_DIR`` and
+ ``<lcName>_BINARY_DIR``, where ``<lcName>`` is the lowercased ``<name>``.
+ ``<lcName>_POPULATED`` will always be set to ``True`` by the call.
+ ``<lcName>_SOURCE_DIR`` is the location where the
+ content can be found upon return (it will have already been populated), while
+ ``<lcName>_BINARY_DIR`` is a directory intended for use as a corresponding
+ build directory. The main use case for the two directory variables is to
+ call :command:`add_subdirectory` immediately after population, i.e.:
+
+ .. code-block:: cmake
+
+ FetchContent_Populate(FooBar ...)
+ add_subdirectory(${foobar_SOURCE_DIR} ${foobar_BINARY_DIR})
+
+ The values of the three variables can also be retrieved from anywhere in the
+ project hierarchy using the :command:`FetchContent_GetProperties` command.
+
+ A number of cache variables influence the behavior of all content population
+ performed using details saved from a :command:`FetchContent_Declare` call:
+
+ ``FETCHCONTENT_BASE_DIR``
+ In most cases, the saved details do not specify any options relating to the
+ directories to use for the internal sub-build, final source and build areas.
+ It is generally best to leave these decisions up to the ``FetchContent``
+ module to handle on the project's behalf. The ``FETCHCONTENT_BASE_DIR``
+ cache variable controls the point under which all content population
+ directories are collected, but in most cases developers would not need to
+ change this. The default location is ``${CMAKE_BINARY_DIR}/_deps``, but if
+ developers change this value, they should aim to keep the path short and
+ just below the top level of the build tree to avoid running into path
+ length problems on Windows.
+
+ ``FETCHCONTENT_QUIET``
+ The logging output during population can be quite verbose, making the
+ configure stage quite noisy. This cache option (``ON`` by default) hides
+ all population output unless an error is encountered. If experiencing
+ problems with hung downloads, temporarily switching this option off may
+ help diagnose which content population is causing the issue.
+
+ ``FETCHCONTENT_FULLY_DISCONNECTED``
+ When this option is enabled, no attempt is made to download or update
+ any content. It is assumed that all content has already been populated in
+ a previous run or the source directories have been pointed at existing
+ contents the developer has provided manually (using options described
+ further below). When the developer knows that no changes have been made to
+ any content details, turning this option ``ON`` can significantly speed up
+ the configure stage. It is ``OFF`` by default.
+
+ ``FETCHCONTENT_UPDATES_DISCONNECTED``
+ This is a less severe download/update control compared to
+ ``FETCHCONTENT_FULLY_DISCONNECTED``. Instead of bypassing all download and
+ update logic, the ``FETCHCONTENT_UPDATES_DISCONNECTED`` only disables the
+ update stage. Therefore, if content has not been downloaded previously,
+ it will still be downloaded when this option is enabled. This can speed up
+ the configure stage, but not as much as
+ ``FETCHCONTENT_FULLY_DISCONNECTED``. It is ``OFF`` by default.
+
+ In addition to the above cache variables, the following cache variables are
+ also defined for each content name (``<ucName>`` is the uppercased value of
+ ``<name>``):
+
+ ``FETCHCONTENT_SOURCE_DIR_<ucName>``
+ If this is set, no download or update steps are performed for the specified
+ content and the ``<lcName>_SOURCE_DIR`` variable returned to the caller is
+ pointed at this location. This gives developers a way to have a separate
+ checkout of the content that they can modify freely without interference
+ from the build. The build simply uses that existing source, but it still
+ defines ``<lcName>_BINARY_DIR`` to point inside its own build area.
+ Developers are strongly encouraged to use this mechanism rather than
+ editing the sources populated in the default location, as changes to
+ sources in the default location can be lost when content population details
+ are changed by the project.
+
+ ``FETCHCONTENT_UPDATES_DISCONNECTED_<ucName>``
+ This is the per-content equivalent of
+ ``FETCHCONTENT_UPDATES_DISCONNECTED``. If the global option or this option
+ is ``ON``, then updates will be disabled for the named content.
+ Disabling updates for individual content can be useful for content whose
+ details rarely change, while still leaving other frequently changing
+ content with updates enabled.
+
+
+ The ``FetchContent_Populate()`` command also supports a syntax allowing the
+ content details to be specified directly rather than using any saved
+ details. This is more low-level and use of this form is generally to be
+ avoided in favour of using saved content details as outlined above.
+ Nevertheless, in certain situations it can be useful to invoke the content
+ population as an isolated operation (typically as part of implementing some
+ other higher level feature or when using CMake in script mode):
+
+ .. code-block:: cmake
+
+ FetchContent_Populate( <name>
+ [QUIET]
+ [SUBBUILD_DIR <subBuildDir>]
+ [SOURCE_DIR <srcDir>]
+ [BINARY_DIR <binDir>]
+ ...
+ )
+
+ This form has a number of key differences to that where only ``<name>`` is
+ provided:
+
+ - All required population details are assumed to have been provided directly
+ in the call to ``FetchContent_Populate()``. Any saved details for
+ ``<name>`` are ignored.
+ - No check is made for whether content for ``<name>`` has already been
+ populated.
+ - No global property is set to record that the population has occurred.
+ - No global properties record the source or binary directories used for the
+ populated content.
+ - The ``FETCHCONTENT_FULLY_DISCONNECTED`` and
+ ``FETCHCONTENT_UPDATES_DISCONNECTED`` cache variables are ignored.
+
+ The ``<lcName>_SOURCE_DIR`` and ``<lcName>_BINARY_DIR`` variables are still
+ returned to the caller, but since these locations are not stored as global
+ properties when this form is used, they are only available to the calling
+ scope and below rather than the entire project hierarchy. No
+ ``<lcName>_POPULATED`` variable is set in the caller's scope with this form.
+
+ The supported options for ``FetchContent_Populate()`` are the same as those
+ for :command:`FetchContent_Declare()`. Those few options shown just
+ above are either specific to ``FetchContent_Populate()`` or their behavior is
+ slightly modified from how :command:`ExternalProject_Add` treats them.
+
+ ``QUIET``
+ The ``QUIET`` option can be given to hide the output associated with
+ populating the specified content. If the population fails, the output will
+ be shown regardless of whether this option was given or not so that the
+ cause of the failure can be diagnosed. The global ``FETCHCONTENT_QUIET``
+ cache variable has no effect on ``FetchContent_Populate()`` calls where the
+ content details are provided directly.
+
+ ``SUBBUILD_DIR``
+ The ``SUBBUILD_DIR`` argument can be provided to change the location of the
+ sub-build created to perform the population. The default value is
+ ``${CMAKE_CURRENT_BINARY_DIR}/<lcName>-subbuild`` and it would be unusual
+ to need to override this default. If a relative path is specified, it will
+ be interpreted as relative to :variable:`CMAKE_CURRENT_BINARY_DIR`.
+
+ ``SOURCE_DIR``, ``BINARY_DIR``
+ The ``SOURCE_DIR`` and ``BINARY_DIR`` arguments are supported by
+ :command:`ExternalProject_Add`, but different default values are used by
+ ``FetchContent_Populate()``. ``SOURCE_DIR`` defaults to
+ ``${CMAKE_CURRENT_BINARY_DIR}/<lcName>-src`` and ``BINARY_DIR`` defaults to
+ ``${CMAKE_CURRENT_BINARY_DIR}/<lcName>-build``. If a relative path is
+ specified, it will be interpreted as relative to
+ :variable:`CMAKE_CURRENT_BINARY_DIR`.
+
+ In addition to the above explicit options, any other unrecognized options are
+ passed through unmodified to :command:`ExternalProject_Add` to perform the
+ download, patch and update steps. The following options are explicitly
+ prohibited (they are disabled by the ``FetchContent_Populate()`` command):
+
+ - ``CONFIGURE_COMMAND``
+ - ``BUILD_COMMAND``
+ - ``INSTALL_COMMAND``
+ - ``TEST_COMMAND``
+
+ If using ``FetchContent_Populate()`` within CMake's script mode, be aware
+ that the implementation sets up a sub-build which therefore requires a CMake
+ generator and build tool to be available. If these cannot be found by
+ default, then the :variable:`CMAKE_GENERATOR` and/or
+ :variable:`CMAKE_MAKE_PROGRAM` variables will need to be set appropriately
+ on the command line invoking the script.
+
+
+Retrieve Population Properties
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. command:: FetchContent_GetProperties
+
+ When using saved content details, a call to :command:`FetchContent_Populate`
+ records information in global properties which can be queried at any time.
+ This information includes the source and binary directories associated with
+ the content and also whether or not the content population has been processed
+ during the current configure run.
+
+ .. code-block:: cmake
+
+ FetchContent_GetProperties( <name>
+ [SOURCE_DIR <srcDirVar>]
+ [BINARY_DIR <binDirVar>]
+ [POPULATED <doneVar>]
+ )
+
+ The ``SOURCE_DIR``, ``BINARY_DIR`` and ``POPULATED`` options can be used to
+ specify which properties should be retrieved. Each option accepts a value
+ which is the name of the variable in which to store that property. Most of
+ the time though, only ``<name>`` is given, in which case the call will then
+ set the same variables as a call to
+ :command:`FetchContent_Populate(name) <FetchContent_Populate>`. This allows
+ the following canonical pattern to be used, which ensures that the relevant
+ variables will always be defined regardless of whether or not the population
+ has been performed elsewhere in the project already:
+
+ .. code-block:: cmake
+
+ FetchContent_GetProperties(foobar)
+ if(NOT foobar_POPULATED)
+ FetchContent_Populate(foobar)
+
+ # Set any custom variables, etc. here, then
+ # populate the content as part of this build
+
+ add_subdirectory(${foobar_SOURCE_DIR} ${foobar_BINARY_DIR})
+ endif()
+
+ The above pattern allows other parts of the overall project hierarchy to
+ re-use the same content and ensure that it is only populated once.
+
+
+.. _`fetch-content-examples`:
+
+Examples
+^^^^^^^^
+
+Consider a project hierarchy where ``projA`` is the top level project and it
+depends on projects ``projB`` and ``projC``. Both ``projB`` and ``projC``
+can be built standalone and they also both depend on another project
+``projD``. For simplicity, this example will assume that all four projects
+are available on a company git server. The ``CMakeLists.txt`` of each project
+might have sections like the following:
+
+*projA*:
+
+.. code-block:: cmake
+
+ include(FetchContent)
+ FetchContent_Declare(
+ projB
+ GIT_REPOSITORY git@mycompany.com/git/projB.git
+ GIT_TAG 4a89dc7e24ff212a7b5167bef7ab079d
+ )
+ FetchContent_Declare(
+ projC
+ GIT_REPOSITORY git@mycompany.com/git/projC.git
+ GIT_TAG 4ad4016bd1d8d5412d135cf8ceea1bb9
+ )
+ FetchContent_Declare(
+ projD
+ GIT_REPOSITORY git@mycompany.com/git/projD.git
+ GIT_TAG origin/integrationBranch
+ )
+
+ FetchContent_GetProperties(projB)
+ if(NOT projb_POPULATED)
+ FetchContent_Populate(projB)
+ add_subdirectory(${projb_SOURCE_DIR} ${projb_BINARY_DIR})
+ endif()
+
+ FetchContent_GetProperties(projC)
+ if(NOT projc_POPULATED)
+ FetchContent_Populate(projC)
+ add_subdirectory(${projc_SOURCE_DIR} ${projc_BINARY_DIR})
+ endif()
+
+*projB*:
+
+.. code-block:: cmake
+
+ include(FetchContent)
+ FetchContent_Declare(
+ projD
+ GIT_REPOSITORY git@mycompany.com/git/projD.git
+ GIT_TAG 20b415f9034bbd2a2e8216e9a5c9e632
+ )
+
+ FetchContent_GetProperties(projD)
+ if(NOT projd_POPULATED)
+ FetchContent_Populate(projD)
+ add_subdirectory(${projd_SOURCE_DIR} ${projd_BINARY_DIR})
+ endif()
+
+
+*projC*:
+
+.. code-block:: cmake
+
+ include(FetchContent)
+ FetchContent_Declare(
+ projD
+ GIT_REPOSITORY git@mycompany.com/git/projD.git
+ GIT_TAG 7d9a17ad2c962aa13e2fbb8043fb6b8a
+ )
+
+ FetchContent_GetProperties(projD)
+ if(NOT projd_POPULATED)
+ FetchContent_Populate(projD)
+ add_subdirectory(${projd_SOURCE_DIR} ${projd_BINARY_DIR})
+ endif()
+
+A few key points should be noted in the above:
+
+- ``projB`` and ``projC`` define different content details for ``projD``,
+ but ``projA`` also defines a set of content details for ``projD`` and
+ because ``projA`` will define them first, the details from ``projB`` and
+ ``projC`` will not be used. The override details defined by ``projA``
+ are not required to match either of those from ``projB`` or ``projC``, but
+ it is up to the higher level project to ensure that the details it does
+ define still make sense for the child projects.
+- While ``projA`` defined content details for ``projD``, it did not need
+ to explicitly call ``FetchContent_Populate(projD)`` itself. Instead, it
+ leaves that to a child project to do (in this case it will be ``projB``
+ since it is added to the build ahead of ``projC``). If ``projA`` needed to
+ customize how the ``projD`` content was brought into the build as well
+ (e.g. define some CMake variables before calling
+ :command:`add_subdirectory` after populating), it would do the call to
+ ``FetchContent_Populate()``, etc. just as it did for the ``projB`` and
+ ``projC`` content. For higher level projects, it is usually enough to
+ just define the override content details and leave the actual population
+ to the child projects. This saves repeating the same thing at each level
+ of the project hierarchy unnecessarily.
+- Even though ``projA`` is the top level project in this example, it still
+ checks whether ``projB`` and ``projC`` have already been populated before
+ going ahead to do those populations. This makes ``projA`` able to be more
+ easily incorporated as a child of some other higher level project in the
+ future if required. Always protect a call to
+ :command:`FetchContent_Populate` with a check to
+ :command:`FetchContent_GetProperties`, even in what may be considered a top
+ level project at the time.
+
+
+The following example demonstrates how one might download and unpack a
+firmware tarball using CMake's :manual:`script mode <cmake(1)>`. The call to
+:command:`FetchContent_Populate` specifies all the content details and the
+unpacked firmware will be placed in a ``firmware`` directory below the
+current working directory.
+
+*getFirmware.cmake*:
+
+.. code-block:: cmake
+
+ # NOTE: Intended to be run in script mode with cmake -P
+ include(FetchContent)
+ FetchContent_Populate(
+ firmware
+ URL https://mycompany.com/assets/firmware-1.23-arm.tar.gz
+ URL_HASH MD5=68247684da89b608d466253762b0ff11
+ SOURCE_DIR firmware
+ )
+
+#]=======================================================================]
+
+
+set(__FetchContent_privateDir "${CMAKE_CURRENT_LIST_DIR}/FetchContent")
+
+#=======================================================================
+# Recording and retrieving content details for later population
+#=======================================================================
+
+# Internal use, projects must not call this directly. It is
+# intended for use by FetchContent_Declare() only.
+#
+# Sets a content-specific global property (not meant for use
+# outside of functions defined here in this file) which can later
+# be retrieved using __FetchContent_getSavedDetails() with just the
+# same content name. If there is already a value stored in the
+# property, it is left unchanged and this call has no effect.
+# This allows parent projects to define the content details,
+# overriding anything a child project may try to set (properties
+# are not cached between runs, so the first thing to set it in a
+# build will be in control).
+function(__FetchContent_declareDetails contentName)
+
+ string(TOLOWER ${contentName} contentNameLower)
+ set(propertyName "_FetchContent_${contentNameLower}_savedDetails")
+ get_property(alreadyDefined GLOBAL PROPERTY ${propertyName} DEFINED)
+ if(NOT alreadyDefined)
+ define_property(GLOBAL PROPERTY ${propertyName}
+ BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+ FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
+ )
+ set_property(GLOBAL PROPERTY ${propertyName} ${ARGN})
+ endif()
+
+endfunction()
+
+
+# Internal use, projects must not call this directly. It is
+# intended for use by the FetchContent_Declare() function.
+#
+# Retrieves details saved for the specified content in an
+# earlier call to __FetchContent_declareDetails().
+function(__FetchContent_getSavedDetails contentName outVar)
+
+ string(TOLOWER ${contentName} contentNameLower)
+ set(propertyName "_FetchContent_${contentNameLower}_savedDetails")
+ get_property(alreadyDefined GLOBAL PROPERTY ${propertyName} DEFINED)
+ if(NOT alreadyDefined)
+ message(FATAL_ERROR "No content details recorded for ${contentName}")
+ endif()
+ get_property(propertyValue GLOBAL PROPERTY ${propertyName})
+ set(${outVar} "${propertyValue}" PARENT_SCOPE)
+
+endfunction()
+
+
+# Saves population details of the content, sets defaults for the
+# SOURCE_DIR and BUILD_DIR.
+function(FetchContent_Declare contentName)
+
+ set(options "")
+ set(oneValueArgs SVN_REPOSITORY)
+ set(multiValueArgs "")
+
+ cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ unset(srcDirSuffix)
+ unset(svnRepoArgs)
+ if(ARG_SVN_REPOSITORY)
+ # Add a hash of the svn repository URL to the source dir. This works
+ # around the problem where if the URL changes, the download would
+ # fail because it tries to checkout/update rather than switch the
+ # old URL to the new one. We limit the hash to the first 7 characters
+ # so that the source path doesn't get overly long (which can be a
+ # problem on windows due to path length limits).
+ string(SHA1 urlSHA ${ARG_SVN_REPOSITORY})
+ string(SUBSTRING ${urlSHA} 0 7 urlSHA)
+ set(srcDirSuffix "-${urlSHA}")
+ set(svnRepoArgs SVN_REPOSITORY ${ARG_SVN_REPOSITORY})
+ endif()
+
+ string(TOLOWER ${contentName} contentNameLower)
+ __FetchContent_declareDetails(
+ ${contentNameLower}
+ SOURCE_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-src${srcDirSuffix}"
+ BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build"
+ ${svnRepoArgs}
+ # List these last so they can override things we set above
+ ${ARG_UNPARSED_ARGUMENTS}
+ )
+
+endfunction()
+
+
+#=======================================================================
+# Set/get whether the specified content has been populated yet.
+# The setter also records the source and binary dirs used.
+#=======================================================================
+
+# Internal use, projects must not call this directly. It is
+# intended for use by the FetchContent_Populate() function to
+# record when FetchContent_Populate() is called for a particular
+# content name.
+function(__FetchContent_setPopulated contentName sourceDir binaryDir)
+
+ string(TOLOWER ${contentName} contentNameLower)
+ set(prefix "_FetchContent_${contentNameLower}")
+
+ set(propertyName "${prefix}_sourceDir")
+ define_property(GLOBAL PROPERTY ${propertyName}
+ BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+ FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
+ )
+ set_property(GLOBAL PROPERTY ${propertyName} ${sourceDir})
+
+ set(propertyName "${prefix}_binaryDir")
+ define_property(GLOBAL PROPERTY ${propertyName}
+ BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+ FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
+ )
+ set_property(GLOBAL PROPERTY ${propertyName} ${binaryDir})
+
+ set(propertyName "${prefix}_populated")
+ define_property(GLOBAL PROPERTY ${propertyName}
+ BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+ FULL_DOCS "Details used by FetchContent_Populate() for ${contentName}"
+ )
+ set_property(GLOBAL PROPERTY ${propertyName} True)
+
+endfunction()
+
+
+# Set variables in the calling scope for any of the retrievable
+# properties. If no specific properties are requested, variables
+# will be set for all retrievable properties.
+#
+# This function is intended to also be used by projects as the canonical
+# way to detect whether they should call FetchContent_Populate()
+# and pull the populated source into the build with add_subdirectory(),
+# if they are using the populated content in that way.
+function(FetchContent_GetProperties contentName)
+
+ string(TOLOWER ${contentName} contentNameLower)
+
+ set(options "")
+ set(oneValueArgs SOURCE_DIR BINARY_DIR POPULATED)
+ set(multiValueArgs "")
+
+ cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ if(NOT ARG_SOURCE_DIR AND
+ NOT ARG_BINARY_DIR AND
+ NOT ARG_POPULATED)
+ # No specific properties requested, provide them all
+ set(ARG_SOURCE_DIR ${contentNameLower}_SOURCE_DIR)
+ set(ARG_BINARY_DIR ${contentNameLower}_BINARY_DIR)
+ set(ARG_POPULATED ${contentNameLower}_POPULATED)
+ endif()
+
+ set(prefix "_FetchContent_${contentNameLower}")
+
+ if(ARG_SOURCE_DIR)
+ set(propertyName "${prefix}_sourceDir")
+ get_property(value GLOBAL PROPERTY ${propertyName})
+ if(value)
+ set(${ARG_SOURCE_DIR} ${value} PARENT_SCOPE)
+ endif()
+ endif()
+
+ if(ARG_BINARY_DIR)
+ set(propertyName "${prefix}_binaryDir")
+ get_property(value GLOBAL PROPERTY ${propertyName})
+ if(value)
+ set(${ARG_BINARY_DIR} ${value} PARENT_SCOPE)
+ endif()
+ endif()
+
+ if(ARG_POPULATED)
+ set(propertyName "${prefix}_populated")
+ get_property(value GLOBAL PROPERTY ${propertyName} DEFINED)
+ set(${ARG_POPULATED} ${value} PARENT_SCOPE)
+ endif()
+
+endfunction()
+
+
+#=======================================================================
+# Performing the population
+#=======================================================================
+
+# The value of contentName will always have been lowercased by the caller.
+# All other arguments are assumed to be options that are understood by
+# ExternalProject_Add(), except for QUIET and SUBBUILD_DIR.
+function(__FetchContent_directPopulate contentName)
+
+ set(options
+ QUIET
+ )
+ set(oneValueArgs
+ SUBBUILD_DIR
+ SOURCE_DIR
+ BINARY_DIR
+ # Prevent the following from being passed through
+ CONFIGURE_COMMAND
+ BUILD_COMMAND
+ INSTALL_COMMAND
+ TEST_COMMAND
+ )
+ set(multiValueArgs "")
+
+ cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ if(NOT ARG_SUBBUILD_DIR)
+ message(FATAL_ERROR "Internal error: SUBBUILD_DIR not set")
+ elseif(NOT IS_ABSOLUTE "${ARG_SUBBUILD_DIR}")
+ set(ARG_SUBBUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ARG_SUBBUILD_DIR}")
+ endif()
+
+ if(NOT ARG_SOURCE_DIR)
+ message(FATAL_ERROR "Internal error: SOURCE_DIR not set")
+ elseif(NOT IS_ABSOLUTE "${ARG_SOURCE_DIR}")
+ set(ARG_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ARG_SOURCE_DIR}")
+ endif()
+
+ if(NOT ARG_BINARY_DIR)
+ message(FATAL_ERROR "Internal error: BINARY_DIR not set")
+ elseif(NOT IS_ABSOLUTE "${ARG_BINARY_DIR}")
+ set(ARG_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ARG_BINARY_DIR}")
+ endif()
+
+ # Ensure the caller can know where to find the source and build directories
+ # with some convenient variables. Doing this here ensures the caller sees
+ # the correct result in the case where the default values are overridden by
+ # the content details set by the project.
+ set(${contentName}_SOURCE_DIR "${ARG_SOURCE_DIR}" PARENT_SCOPE)
+ set(${contentName}_BINARY_DIR "${ARG_BINARY_DIR}" PARENT_SCOPE)
+
+ # The unparsed arguments may contain spaces, so build up ARG_EXTRA
+ # in such a way that it correctly substitutes into the generated
+ # CMakeLists.txt file with each argument quoted.
+ unset(ARG_EXTRA)
+ foreach(arg IN LISTS ARG_UNPARSED_ARGUMENTS)
+ set(ARG_EXTRA "${ARG_EXTRA} \"${arg}\"")
+ endforeach()
+
+ # Hide output if requested, but save it to a variable in case there's an
+ # error so we can show the output upon failure. When not quiet, don't
+ # capture the output to a variable because the user may want to see the
+ # output as it happens (e.g. progress during long downloads). Combine both
+ # stdout and stderr in the one capture variable so the output stays in order.
+ if (ARG_QUIET)
+ set(outputOptions
+ OUTPUT_VARIABLE capturedOutput
+ ERROR_VARIABLE capturedOutput
+ )
+ else()
+ set(capturedOutput)
+ set(outputOptions)
+ message(STATUS "Populating ${contentName}")
+ endif()
+
+ if(CMAKE_GENERATOR)
+ set(generatorOpts "-G${CMAKE_GENERATOR}")
+ if(CMAKE_GENERATOR_PLATFORM)
+ list(APPEND generatorOpts "-A${CMAKE_GENERATOR_PLATFORM}")
+ endif()
+ if(CMAKE_GENERATOR_TOOLSET)
+ list(APPEND generatorOpts "-T${CMAKE_GENERATOR_TOOLSET}")
+ endif()
+
+ if(CMAKE_MAKE_PROGRAM)
+ list(APPEND generatorOpts "-DCMAKE_MAKE_PROGRAM:FILEPATH=${CMAKE_MAKE_PROGRAM}")
+ endif()
+
+ else()
+ # Likely we've been invoked via CMake's script mode where no
+ # generator is set (and hence CMAKE_MAKE_PROGRAM could not be
+ # trusted even if provided). We will have to rely on being
+ # able to find the default generator and build tool.
+ unset(generatorOpts)
+ endif()
+
+ # Create and build a separate CMake project to carry out the population.
+ # If we've already previously done these steps, they will not cause
+ # anything to be updated, so extra rebuilds of the project won't occur.
+ # Make sure to pass through CMAKE_MAKE_PROGRAM in case the main project
+ # has this set to something not findable on the PATH.
+ configure_file("${__FetchContent_privateDir}/CMakeLists.cmake.in"
+ "${ARG_SUBBUILD_DIR}/CMakeLists.txt")
+ execute_process(
+ COMMAND ${CMAKE_COMMAND} ${generatorOpts} .
+ RESULT_VARIABLE result
+ ${outputOptions}
+ WORKING_DIRECTORY "${ARG_SUBBUILD_DIR}"
+ )
+ if(result)
+ if(capturedOutput)
+ message("${capturedOutput}")
+ endif()
+ message(FATAL_ERROR "CMake step for ${contentName} failed: ${result}")
+ endif()
+ execute_process(
+ COMMAND ${CMAKE_COMMAND} --build .
+ RESULT_VARIABLE result
+ ${outputOptions}
+ WORKING_DIRECTORY "${ARG_SUBBUILD_DIR}"
+ )
+ if(result)
+ if(capturedOutput)
+ message("${capturedOutput}")
+ endif()
+ message(FATAL_ERROR "Build step for ${contentName} failed: ${result}")
+ endif()
+
+endfunction()
+
+
+option(FETCHCONTENT_FULLY_DISCONNECTED "Disables all attempts to download or update content and assumes source dirs already exist")
+option(FETCHCONTENT_UPDATES_DISCONNECTED "Enables UPDATE_DISCONNECTED behavior for all content population")
+option(FETCHCONTENT_QUIET "Enables QUIET option for all content population" ON)
+set(FETCHCONTENT_BASE_DIR "${CMAKE_BINARY_DIR}/_deps" CACHE PATH "Directory under which to collect all populated content")
+
+# Populate the specified content using details stored from
+# an earlier call to FetchContent_Declare().
+function(FetchContent_Populate contentName)
+
+ if(NOT contentName)
+ message(FATAL_ERROR "Empty contentName not allowed for FetchContent_Populate()")
+ endif()
+
+ string(TOLOWER ${contentName} contentNameLower)
+
+ if(ARGN)
+ # This is the direct population form with details fully specified
+ # as part of the call, so we already have everything we need
+ __FetchContent_directPopulate(
+ ${contentNameLower}
+ SUBBUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/${contentNameLower}-subbuild"
+ SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/${contentNameLower}-src"
+ BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/${contentNameLower}-build"
+ ${ARGN} # Could override any of the above ..._DIR variables
+ )
+
+ # Pass source and binary dir variables back to the caller
+ set(${contentNameLower}_SOURCE_DIR "${${contentNameLower}_SOURCE_DIR}" PARENT_SCOPE)
+ set(${contentNameLower}_BINARY_DIR "${${contentNameLower}_BINARY_DIR}" PARENT_SCOPE)
+
+ # Don't set global properties, or record that we did this population, since
+ # this was a direct call outside of the normal declared details form.
+ # We only want to save values in the global properties for content that
+ # honours the hierarchical details mechanism so that projects are not
+ # robbed of the ability to override details set in nested projects.
+ return()
+ endif()
+
+ # No details provided, so assume they were saved from an earlier call
+ # to FetchContent_Declare(). Do a check that we haven't already
+ # populated this content before in case the caller forgot to check.
+ FetchContent_GetProperties(${contentName})
+ if(${contentNameLower}_POPULATED)
+ message(FATAL_ERROR "Content ${contentName} already populated in ${${contentNameLower}_SOURCE_DIR}")
+ endif()
+
+ string(TOUPPER ${contentName} contentNameUpper)
+ set(FETCHCONTENT_SOURCE_DIR_${contentNameUpper}
+ "${FETCHCONTENT_SOURCE_DIR_${contentNameUpper}}"
+ CACHE PATH "When not empty, overrides where to find pre-populated content for ${contentName}")
+
+ if(FETCHCONTENT_SOURCE_DIR_${contentNameUpper})
+ # The source directory has been explicitly provided in the cache,
+ # so no population is required
+ set(${contentNameLower}_SOURCE_DIR "${FETCHCONTENT_SOURCE_DIR_${contentNameUpper}}")
+ set(${contentNameLower}_BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build")
+
+ elseif(FETCHCONTENT_FULLY_DISCONNECTED)
+ # Bypass population and assume source is already there from a previous run
+ set(${contentNameLower}_SOURCE_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-src")
+ set(${contentNameLower}_BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build")
+
+ else()
+ # Support both a global "disconnect all updates" and a per-content
+ # update test (either one being set disables updates for this content).
+ option(FETCHCONTENT_UPDATES_DISCONNECTED_${contentNameUpper}
+ "Enables UPDATE_DISCONNECTED behavior just for population of ${contentName}")
+ if(FETCHCONTENT_UPDATES_DISCONNECTED OR
+ FETCHCONTENT_UPDATES_DISCONNECTED_${contentNameUpper})
+ set(disconnectUpdates True)
+ else()
+ set(disconnectUpdates False)
+ endif()
+
+ if(FETCHCONTENT_QUIET)
+ set(quietFlag QUIET)
+ else()
+ unset(quietFlag)
+ endif()
+
+ __FetchContent_getSavedDetails(${contentName} contentDetails)
+ if("${contentDetails}" STREQUAL "")
+ message(FATAL_ERROR "No details have been set for content: ${contentName}")
+ endif()
+
+ __FetchContent_directPopulate(
+ ${contentNameLower}
+ ${quietFlag}
+ UPDATE_DISCONNECTED ${disconnectUpdates}
+ SUBBUILD_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-subbuild"
+ SOURCE_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-src"
+ BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build"
+ # Put the saved details last so they can override any of the
+ # the options we set above (this can include SOURCE_DIR or
+ # BUILD_DIR)
+ ${contentDetails}
+ )
+ endif()
+
+ __FetchContent_setPopulated(
+ ${contentName}
+ ${${contentNameLower}_SOURCE_DIR}
+ ${${contentNameLower}_BINARY_DIR}
+ )
+
+ # Pass variables back to the caller. The variables passed back here
+ # must match what FetchContent_GetProperties() sets when it is called
+ # with just the content name.
+ set(${contentNameLower}_SOURCE_DIR "${${contentNameLower}_SOURCE_DIR}" PARENT_SCOPE)
+ set(${contentNameLower}_BINARY_DIR "${${contentNameLower}_BINARY_DIR}" PARENT_SCOPE)
+ set(${contentNameLower}_POPULATED True PARENT_SCOPE)
+
+endfunction()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent/CMakeLists.cmake.in b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent/CMakeLists.cmake.in
new file mode 100644
index 0000000..9a7a771
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent/CMakeLists.cmake.in
@@ -0,0 +1,21 @@
+# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+cmake_minimum_required(VERSION ${CMAKE_VERSION})
+
+# We name the project and the target for the ExternalProject_Add() call
+# to something that will highlight to the user what we are working on if
+# something goes wrong and an error message is produced.
+
+project(${contentName}-populate NONE)
+
+include(ExternalProject)
+ExternalProject_Add(${contentName}-populate
+ ${ARG_EXTRA}
+ SOURCE_DIR "${ARG_SOURCE_DIR}"
+ BINARY_DIR "${ARG_BINARY_DIR}"
+ CONFIGURE_COMMAND ""
+ BUILD_COMMAND ""
+ INSTALL_COMMAND ""
+ TEST_COMMAND ""
+)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/README.md b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/README.md
new file mode 100644
index 0000000..c8d275f
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/README.md
@@ -0,0 +1,5 @@
+
+## FetchContent
+
+`FetchContent.cmake` and `FetchContent/CMakeLists.cmake.in`
+are copied from `cmake/3.11.0/share/cmake-3.11/Modules`.
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/cmake_extension.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/cmake_extension.py
new file mode 100644
index 0000000..f3ca874
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/cmake_extension.py
@@ -0,0 +1,120 @@
+# Copyright (c) 2021 Xiaomi Corporation (author: Fangjun Kuang)
+
+import glob
+import os
+import platform
+import shutil
+import sys
+from pathlib import Path
+
+import setuptools
+from setuptools.command.build_ext import build_ext
+
+
+def is_for_pypi():
+ ans = os.environ.get("KALDI_NATIVE_FBANK_IS_FOR_PYPI", None)
+ return ans is not None
+
+
+def is_macos():
+ return platform.system() == "Darwin"
+
+
+def is_windows():
+ return platform.system() == "Windows"
+
+
+try:
+ from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
+
+ class bdist_wheel(_bdist_wheel):
+ def finalize_options(self):
+ _bdist_wheel.finalize_options(self)
+ # In this case, the generated wheel has a name in the form
+ # kaldifeat-xxx-pyxx-none-any.whl
+ if is_for_pypi() and not is_macos():
+ self.root_is_pure = True
+ else:
+ # The generated wheel has a name ending with
+ # -linux_x86_64.whl
+ self.root_is_pure = False
+
+
+except ImportError:
+ bdist_wheel = None
+
+
+def cmake_extension(name, *args, **kwargs) -> setuptools.Extension:
+ kwargs["language"] = "c++"
+ sources = []
+ return setuptools.Extension(name, sources, *args, **kwargs)
+
+
+class BuildExtension(build_ext):
+ def build_extension(self, ext: setuptools.extension.Extension):
+ # build/temp.linux-x86_64-3.8
+ os.makedirs(self.build_temp, exist_ok=True)
+
+ # build/lib.linux-x86_64-3.8
+ os.makedirs(self.build_lib, exist_ok=True)
+
+ install_dir = Path(self.build_lib).resolve() / "kaldi_native_fbank"
+
+ kaldi_native_fbank_dir = Path(__file__).parent.parent.resolve()
+
+ cmake_args = os.environ.get("KALDI_NATIVE_FBANK_CMAKE_ARGS", "")
+ make_args = os.environ.get("KALDI_NATIVE_FBANK_MAKE_ARGS", "")
+ system_make_args = os.environ.get("MAKEFLAGS", "")
+
+ if cmake_args == "":
+ cmake_args = "-DCMAKE_BUILD_TYPE=Release"
+
+ extra_cmake_args = f" -DCMAKE_INSTALL_PREFIX={install_dir} "
+ extra_cmake_args += " -DKALDI_NATIVE_FBANK_BUILD_TESTS=OFF "
+
+ if "PYTHON_EXECUTABLE" not in cmake_args:
+ print(f"Setting PYTHON_EXECUTABLE to {sys.executable}")
+ cmake_args += f" -DPYTHON_EXECUTABLE={sys.executable}"
+
+ cmake_args += extra_cmake_args
+
+ if is_windows():
+ build_cmd = f"""
+ cmake {cmake_args} -B {self.build_temp} -S {kaldi_native_fbank_dir}
+ cmake --build {self.build_temp} --target install --config Release -- -m
+ """
+ print(f"build command is:\n{build_cmd}")
+ ret = os.system(
+ f"cmake {cmake_args} -B {self.build_temp} -S {kaldi_native_fbank_dir}"
+ )
+ if ret != 0:
+ raise Exception("Failed to configure kaldi_native_fbank")
+
+ ret = os.system(
+ f"cmake --build {self.build_temp} --target install --config Release -- -m"
+ )
+ if ret != 0:
+ raise Exception("Failed to install kaldi_native_fbank")
+ else:
+ if make_args == "" and system_make_args == "":
+ print("For fast compilation, run:")
+ print(
+ 'export KALDI_NATIVE_FBANK_MAKE_ARGS="-j"; python setup.py install'
+ )
+
+ build_cmd = f"""
+ cd {self.build_temp}
+
+ cmake {cmake_args} {kaldi_native_fbank_dir}
+
+ make {make_args} install
+ """
+ print(f"build command is:\n{build_cmd}")
+
+ ret = os.system(build_cmd)
+ if ret != 0:
+ raise Exception(
+ "\nBuild kaldi-native-fbank failed. Please check the error message.\n"
+ "You can ask for help by creating an issue on GitHub.\n"
+ "\nClick:\n\thttps://github.com/csukuangfj/kaldi-native-fbank/issues/new\n" # noqa
+ )
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/googletest.cmake b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/googletest.cmake
new file mode 100644
index 0000000..a732dc7
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/googletest.cmake
@@ -0,0 +1,57 @@
+function(download_googltest)
+ if(CMAKE_VERSION VERSION_LESS 3.11)
+ list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
+ endif()
+
+ include(FetchContent)
+
+ set(googletest_URL "https://github.com/google/googletest/archive/release-1.10.0.tar.gz")
+ set(googletest_HASH "SHA256=9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb")
+
+ set(BUILD_GMOCK ON CACHE BOOL "" FORCE)
+ set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
+ set(gtest_disable_pthreads ON CACHE BOOL "" FORCE)
+ set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+
+ FetchContent_Declare(googletest
+ URL ${googletest_URL}
+ URL_HASH ${googletest_HASH}
+ )
+
+ FetchContent_GetProperties(googletest)
+ if(NOT googletest_POPULATED)
+ message(STATUS "Downloading googletest from ${googletest_URL}")
+ FetchContent_Populate(googletest)
+ endif()
+ message(STATUS "googletest is downloaded to ${googletest_SOURCE_DIR}")
+ message(STATUS "googletest's binary dir is ${googletest_BINARY_DIR}")
+
+ if(APPLE)
+ set(CMAKE_MACOSX_RPATH ON) # to solve the following warning on macOS
+ endif()
+ #[==[
+ -- Generating done
+ Policy CMP0042 is not set: MACOSX_RPATH is enabled by default. Run "cmake
+ --help-policy CMP0042" for policy details. Use the cmake_policy command to
+ set the policy and suppress this warning.
+
+ MACOSX_RPATH is not specified for the following targets:
+
+ gmock
+ gmock_main
+ gtest
+ gtest_main
+
+ This warning is for project developers. Use -Wno-dev to suppress it.
+ ]==]
+
+ add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)
+
+ target_include_directories(gtest
+ INTERFACE
+ ${googletest_SOURCE_DIR}/googletest/include
+ ${googletest_SOURCE_DIR}/googlemock/include
+ )
+endfunction()
+
+download_googltest()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/pybind11.cmake b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/pybind11.cmake
new file mode 100644
index 0000000..464f399
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/pybind11.cmake
@@ -0,0 +1,35 @@
+function(download_pybind11)
+ if(CMAKE_VERSION VERSION_LESS 3.11)
+ list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
+ endif()
+
+ include(FetchContent)
+
+ set(pybind11_URL "https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.tar.gz")
+ set(pybind11_HASH "SHA256=6bd528c4dbe2276635dc787b6b1f2e5316cf6b49ee3e150264e455a0d68d19c1")
+
+ # If you don't have access to the Internet, please download it to your
+ # local drive and modify the following line according to your needs.
+ if(EXISTS "/star-fj/fangjun/download/github/pybind11-2.9.2.tar.gz")
+ set(pybind11_URL "file:///star-fj/fangjun/download/github/pybind11-2.9.2.tar.gz")
+ elseif(EXISTS "/Users/fangjun/Downloads/pybind11-2.9.2.tar.gz")
+ set(pybind11_URL "file:///Users/fangjun/Downloads/pybind11-2.9.2.tar.gz")
+ elseif(EXISTS "/tmp/pybind11-2.9.2.tar.gz")
+ set(pybind11_URL "file:///tmp/pybind11-2.9.2.tar.gz")
+ endif()
+
+ FetchContent_Declare(pybind11
+ URL ${pybind11_URL}
+ URL_HASH ${pybind11_HASH}
+ )
+
+ FetchContent_GetProperties(pybind11)
+ if(NOT pybind11_POPULATED)
+ message(STATUS "Downloading pybind11 from ${pybind11_URL}")
+ FetchContent_Populate(pybind11)
+ endif()
+ message(STATUS "pybind11 is downloaded to ${pybind11_SOURCE_DIR}")
+ add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR} EXCLUDE_FROM_ALL)
+endfunction()
+
+download_pybind11()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/CMakeLists.txt
new file mode 100644
index 0000000..2037626
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_subdirectory(csrc)
+
+if(KALDI_NATIVE_FBANK_BUILD_PYTHON)
+ message(STATUS "Building Python")
+ add_subdirectory(python)
+else()
+ message(STATUS "Disable building Python")
+endif()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt
new file mode 100644
index 0000000..bd61361
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_library(csrc STATIC
+ feature-fbank.cc
+ feature-functions.cc
+ feature-window.cc
+ fftsg.c
+ mel-computations.cc
+ online-feature.cc
+ rfft.cc)
\ No newline at end of file
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt.bak b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt.bak
new file mode 100644
index 0000000..6eb693d
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt.bak
@@ -0,0 +1,93 @@
+
+include_directories(${PROJECT_SOURCE_DIR})
+set(sources
+ feature-fbank.cc
+ feature-functions.cc
+ feature-window.cc
+ fftsg.c
+ mel-computations.cc
+ online-feature.cc
+ rfft.cc
+)
+
+if(KALDI_NATIVE_FBANK_ENABLE_CHECK)
+ list(APPEND sources log.cc)
+endif()
+
+add_library(kaldi-native-fbank-core ${sources})
+if(KALDI_NATIVE_FBANK_ENABLE_CHECK)
+ target_compile_definitions(kaldi-native-fbank-core PUBLIC KNF_ENABLE_CHECK=1)
+
+ if(KNF_HAVE_EXECINFO_H)
+ target_compile_definitions(kaldi-native-fbank-core PRIVATE KNF_HAVE_EXECINFO_H=1)
+ endif()
+
+ if(KNF_HAVE_CXXABI_H)
+ target_compile_definitions(kaldi-native-fbank-core PRIVATE KNF_HAVE_CXXABI_H=1)
+ endif()
+endif()
+
+# We are using std::call_once() in log.h,which requires us to link with -pthread
+if(NOT WIN32 AND KALDI_NATIVE_FBANK_ENABLE_CHECK)
+ target_link_libraries(kaldi-native-fbank-core -pthread)
+endif()
+
+if(KALDI_NATIVE_FBANK_BUILD_TESTS)
+ add_executable(test-online-fbank test-online-fbank.cc)
+ target_link_libraries(test-online-fbank kaldi-native-fbank-core)
+endif()
+
+function(kaldi_native_fbank_add_test source)
+ get_filename_component(name ${source} NAME_WE)
+ add_executable(${name} "${source}")
+ target_link_libraries(${name}
+ PRIVATE
+ kaldi-native-fbank-core
+ gtest
+ gtest_main
+ )
+
+ add_test(NAME "Test.${name}"
+ COMMAND
+ $<TARGET_FILE:${name}>
+ )
+endfunction()
+
+# please sort the source files alphabetically
+set(test_srcs
+ # test-online-feature.cc
+ test-log.cc
+ test-rfft.cc
+)
+
+if(KALDI_NATIVE_FBANK_BUILD_TESTS)
+ foreach(source IN LISTS test_srcs)
+ kaldi_native_fbank_add_test(${source})
+ endforeach()
+endif()
+
+install(TARGETS kaldi-native-fbank-core
+ DESTINATION lib
+)
+
+if(KALDI_NATIVE_FBANK_BUILD_TESTS)
+ install(TARGETS test-online-fbank
+ DESTINATION bin
+ )
+endif()
+
+file(MAKE_DIRECTORY
+ DESTINATION
+ ${PROJECT_BINARY_DIR}/include/kaldi-native-fbank/csrc
+)
+file(GLOB_RECURSE all_headers *.h)
+
+file(COPY
+ ${all_headers}
+ DESTINATION
+ ${PROJECT_BINARY_DIR}/include/kaldi-native-fbank/csrc
+)
+
+install(FILES ${all_headers}
+ DESTINATION include/kaldi-native-fbank/csrc
+)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.cc
new file mode 100644
index 0000000..068f495
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.cc
@@ -0,0 +1,120 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is copied/modified from kaldi/src/feat/feature-fbank.cc
+//
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-functions.h"
+
+namespace knf {
+
+static void Sqrt(float *in_out, int32_t n) {
+ for (int32_t i = 0; i != n; ++i) {
+ in_out[i] = std::sqrt(in_out[i]);
+ }
+}
+
+std::ostream &operator<<(std::ostream &os, const FbankOptions &opts) {
+ os << opts.ToString();
+ return os;
+}
+
+FbankComputer::FbankComputer(const FbankOptions &opts)
+ : opts_(opts), rfft_(opts.frame_opts.PaddedWindowSize()) {
+ if (opts.energy_floor > 0.0f) {
+ log_energy_floor_ = logf(opts.energy_floor);
+ }
+
+ // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
+ // [note: this call caches it.]
+ GetMelBanks(1.0f);
+}
+
+FbankComputer::~FbankComputer() {
+ for (auto iter = mel_banks_.begin(); iter != mel_banks_.end(); ++iter)
+ delete iter->second;
+}
+
+const MelBanks *FbankComputer::GetMelBanks(float vtln_warp) {
+ MelBanks *this_mel_banks = nullptr;
+
+ // std::map<float, MelBanks *>::iterator iter = mel_banks_.find(vtln_warp);
+ auto iter = mel_banks_.find(vtln_warp);
+ if (iter == mel_banks_.end()) {
+ this_mel_banks = new MelBanks(opts_.mel_opts, opts_.frame_opts, vtln_warp);
+ mel_banks_[vtln_warp] = this_mel_banks;
+ } else {
+ this_mel_banks = iter->second;
+ }
+ return this_mel_banks;
+}
+
+void FbankComputer::Compute(float signal_raw_log_energy, float vtln_warp,
+ std::vector<float> *signal_frame, float *feature) {
+ const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
+
+ KNF_CHECK_EQ(signal_frame->size(), opts_.frame_opts.PaddedWindowSize());
+
+ // Compute energy after window function (not the raw one).
+ if (opts_.use_energy && !opts_.raw_energy) {
+ signal_raw_log_energy = std::log(
+ std::max<float>(InnerProduct(signal_frame->data(), signal_frame->data(),
+ signal_frame->size()),
+ std::numeric_limits<float>::epsilon()));
+ }
+ rfft_.Compute(signal_frame->data()); // signal_frame is modified in-place
+ ComputePowerSpectrum(signal_frame);
+
+ // Use magnitude instead of power if requested.
+ if (!opts_.use_power) {
+ Sqrt(signal_frame->data(), signal_frame->size() / 2 + 1);
+ }
+
+ int32_t mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);
+
+ // Its length is opts_.mel_opts.num_bins
+ float *mel_energies = feature + mel_offset;
+
+ // Sum with mel filter banks over the power spectrum
+ mel_banks.Compute(signal_frame->data(), mel_energies);
+
+ if (opts_.use_log_fbank) {
+ // Avoid log of zero (which should be prevented anyway by dithering).
+ for (int32_t i = 0; i != opts_.mel_opts.num_bins; ++i) {
+ auto t = std::max(mel_energies[i], std::numeric_limits<float>::epsilon());
+ mel_energies[i] = std::log(t);
+ }
+ }
+
+ // Copy energy as first value (or the last, if htk_compat == true).
+ if (opts_.use_energy) {
+ if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) {
+ signal_raw_log_energy = log_energy_floor_;
+ }
+ int32_t energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0;
+ feature[energy_index] = signal_raw_log_energy;
+ }
+}
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.h
new file mode 100644
index 0000000..0786aad
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.h
@@ -0,0 +1,134 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is copied/modified from kaldi/src/feat/feature-fbank.h
+
+#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
+#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+#include "kaldi-native-fbank/csrc/mel-computations.h"
+#include "kaldi-native-fbank/csrc/rfft.h"
+
+namespace knf {
+
+struct FbankOptions {
+ FrameExtractionOptions frame_opts;
+ MelBanksOptions mel_opts;
+ // append an extra dimension with energy to the filter banks
+ bool use_energy = false;
+ float energy_floor = 0.0f; // active iff use_energy==true
+
+ // If true, compute log_energy before preemphasis and windowing
+ // If false, compute log_energy after preemphasis ans windowing
+ bool raw_energy = true; // active iff use_energy==true
+
+ // If true, put energy last (if using energy)
+ // If false, put energy first
+ bool htk_compat = false; // active iff use_energy==true
+
+ // if true (default), produce log-filterbank, else linear
+ bool use_log_fbank = true;
+
+ // if true (default), use power in filterbank
+ // analysis, else magnitude.
+ bool use_power = true;
+
+ FbankOptions() { mel_opts.num_bins = 23; }
+
+ std::string ToString() const {
+ std::ostringstream os;
+ os << "frame_opts: \n";
+ os << frame_opts << "\n";
+ os << "\n";
+
+ os << "mel_opts: \n";
+ os << mel_opts << "\n";
+
+ os << "use_energy: " << use_energy << "\n";
+ os << "energy_floor: " << energy_floor << "\n";
+ os << "raw_energy: " << raw_energy << "\n";
+ os << "htk_compat: " << htk_compat << "\n";
+ os << "use_log_fbank: " << use_log_fbank << "\n";
+ os << "use_power: " << use_power << "\n";
+ return os.str();
+ }
+};
+
+std::ostream &operator<<(std::ostream &os, const FbankOptions &opts);
+
+class FbankComputer {
+ public:
+ using Options = FbankOptions;
+
+ explicit FbankComputer(const FbankOptions &opts);
+ ~FbankComputer();
+
+ int32_t Dim() const {
+ return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
+ }
+
+ // if true, compute log_energy_pre_window but after dithering and dc removal
+ bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
+
+ const FrameExtractionOptions &GetFrameOptions() const {
+ return opts_.frame_opts;
+ }
+
+ const FbankOptions &GetOptions() const { return opts_; }
+
+ /**
+ Function that computes one frame of features from
+ one frame of signal.
+
+ @param [in] signal_raw_log_energy The log-energy of the frame of the signal
+ prior to windowing and pre-emphasis, or
+ log(numeric_limits<float>::min()), whichever is greater. Must be
+ ignored by this function if this class returns false from
+ this->NeedsRawLogEnergy().
+ @param [in] vtln_warp The VTLN warping factor that the user wants
+ to be applied when computing features for this utterance. Will
+ normally be 1.0, meaning no warping is to be done. The value will
+ be ignored for feature types that don't support VLTN, such as
+ spectrogram features.
+ @param [in] signal_frame One frame of the signal,
+ as extracted using the function ExtractWindow() using the options
+ returned by this->GetFrameOptions(). The function will use the
+ vector as a workspace, which is why it's a non-const pointer.
+ @param [out] feature Pointer to a vector of size this->Dim(), to which
+ the computed feature will be written. It should be pre-allocated.
+ */
+ void Compute(float signal_raw_log_energy, float vtln_warp,
+ std::vector<float> *signal_frame, float *feature);
+
+ private:
+ const MelBanks *GetMelBanks(float vtln_warp);
+
+ FbankOptions opts_;
+ float log_energy_floor_;
+ std::map<float, MelBanks *> mel_banks_; // float is VTLN coefficient.
+ Rfft rfft_;
+};
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.cc
new file mode 100644
index 0000000..00ae4c7
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.cc
@@ -0,0 +1,49 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is copied/modified from kaldi/src/feat/feature-functions.cc
+
+#include "kaldi-native-fbank/csrc/feature-functions.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace knf {
+
+void ComputePowerSpectrum(std::vector<float> *complex_fft) {
+ int32_t dim = complex_fft->size();
+
+ // now we have in complex_fft, first half of complex spectrum
+ // it's stored as [real0, realN/2, real1, im1, real2, im2, ...]
+
+ float *p = complex_fft->data();
+ int32_t half_dim = dim / 2;
+ float first_energy = p[0] * p[0];
+ float last_energy = p[1] * p[1]; // handle this special case
+
+ for (int32_t i = 1; i < half_dim; ++i) {
+ float real = p[i * 2];
+ float im = p[i * 2 + 1];
+ p[i] = real * real + im * im;
+ }
+ p[0] = first_energy;
+ p[half_dim] = last_energy; // Will actually never be used, and anyway
+ // if the signal has been bandlimited sensibly this should be zero.
+}
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.h
new file mode 100644
index 0000000..b221622
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.h
@@ -0,0 +1,38 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is copied/modified from kaldi/src/feat/feature-functions.h
+#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H_
+#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H_
+
+#include <vector>
+namespace knf {
+
+// ComputePowerSpectrum converts a complex FFT (as produced by the FFT
+// functions in csrc/rfft.h), and converts it into
+// a power spectrum. If the complex FFT is a vector of size n (representing
+// half of the complex FFT of a real signal of size n, as described there),
+// this function computes in the first (n/2) + 1 elements of it, the
+// energies of the fft bins from zero to the Nyquist frequency. Contents of the
+// remaining (n/2) - 1 elements are undefined at output.
+
+void ComputePowerSpectrum(std::vector<float> *complex_fft);
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.cc
new file mode 100644
index 0000000..dc189a6
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.cc
@@ -0,0 +1,247 @@
+// kaldi-native-fbank/csrc/feature-window.cc
+//
+// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+
+// This file is copied/modified from kaldi/src/feat/feature-window.cc
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <vector>
+
+#ifndef M_2PI
+#define M_2PI 6.283185307179586476925286766559005
+#endif
+
+namespace knf {
+
+std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts) {
+ os << opts.ToString();
+ return os;
+}
+
+FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts)
+ : window_(opts.WindowSize()) {
+ int32_t frame_length = opts.WindowSize();
+ KNF_CHECK_GT(frame_length, 0);
+
+ float *window_data = window_.data();
+
+ double a = M_2PI / (frame_length - 1);
+ for (int32_t i = 0; i < frame_length; i++) {
+ double i_fl = static_cast<double>(i);
+ if (opts.window_type == "hanning") {
+ window_data[i] = 0.5 - 0.5 * cos(a * i_fl);
+ } else if (opts.window_type == "sine") {
+ // when you are checking ws wikipedia, please
+ // note that 0.5 * a = M_PI/(frame_length-1)
+ window_data[i] = sin(0.5 * a * i_fl);
+ } else if (opts.window_type == "hamming") {
+ window_data[i] = 0.54 - 0.46 * cos(a * i_fl);
+ } else if (opts.window_type ==
+ "povey") { // like hamming but goes to zero at edges.
+ window_data[i] = pow(0.5 - 0.5 * cos(a * i_fl), 0.85);
+ } else if (opts.window_type == "rectangular") {
+ window_data[i] = 1.0;
+ } else if (opts.window_type == "blackman") {
+ window_data[i] = opts.blackman_coeff - 0.5 * cos(a * i_fl) +
+ (0.5 - opts.blackman_coeff) * cos(2 * a * i_fl);
+ } else {
+ KNF_LOG(FATAL) << "Invalid window type " << opts.window_type;
+ }
+ }
+}
+
+void FeatureWindowFunction::Apply(float *wave) const {
+ int32_t window_size = window_.size();
+ const float *p = window_.data();
+ for (int32_t k = 0; k != window_size; ++k) {
+ wave[k] *= p[k];
+ }
+}
+
+int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts) {
+ int64_t frame_shift = opts.WindowShift();
+ if (opts.snip_edges) {
+ return frame * frame_shift;
+ } else {
+ int64_t midpoint_of_frame = frame_shift * frame + frame_shift / 2,
+ beginning_of_frame = midpoint_of_frame - opts.WindowSize() / 2;
+ return beginning_of_frame;
+ }
+}
+
+int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
+ bool flush /*= true*/) {
+ int64_t frame_shift = opts.WindowShift();
+ int64_t frame_length = opts.WindowSize();
+ if (opts.snip_edges) {
+ // with --snip-edges=true (the default), we use a HTK-like approach to
+ // determining the number of frames-- all frames have to fit completely into
+ // the waveform, and the first frame begins at sample zero.
+ if (num_samples < frame_length)
+ return 0;
+ else
+ return (1 + ((num_samples - frame_length) / frame_shift));
+ // You can understand the expression above as follows: 'num_samples -
+ // frame_length' is how much room we have to shift the frame within the
+ // waveform; 'frame_shift' is how much we shift it each time; and the ratio
+ // is how many times we can shift it (integer arithmetic rounds down).
+ } else {
+ // if --snip-edges=false, the number of frames is determined by rounding the
+ // (file-length / frame-shift) to the nearest integer. The point of this
+ // formula is to make the number of frames an obvious and predictable
+ // function of the frame shift and signal length, which makes many
+ // segmentation-related questions simpler.
+ //
+ // Because integer division in C++ rounds toward zero, we add (half the
+ // frame-shift minus epsilon) before dividing, to have the effect of
+ // rounding towards the closest integer.
+ int32_t num_frames = (num_samples + (frame_shift / 2)) / frame_shift;
+
+ if (flush) return num_frames;
+
+ // note: 'end' always means the last plus one, i.e. one past the last.
+ int64_t end_sample_of_last_frame =
+ FirstSampleOfFrame(num_frames - 1, opts) + frame_length;
+
+ // the following code is optimized more for clarity than efficiency.
+ // If flush == false, we can't output frames that extend past the end
+ // of the signal.
+ while (num_frames > 0 && end_sample_of_last_frame > num_samples) {
+ num_frames--;
+ end_sample_of_last_frame -= frame_shift;
+ }
+ return num_frames;
+ }
+}
+
+void ExtractWindow(int64_t sample_offset, const std::vector<float> &wave,
+ int32_t f, const FrameExtractionOptions &opts,
+ const FeatureWindowFunction &window_function,
+ std::vector<float> *window,
+ float *log_energy_pre_window /*= nullptr*/) {
+ KNF_CHECK(sample_offset >= 0 && wave.size() != 0);
+
+ int32_t frame_length = opts.WindowSize();
+ int32_t frame_length_padded = opts.PaddedWindowSize();
+
+ int64_t num_samples = sample_offset + wave.size();
+ int64_t start_sample = FirstSampleOfFrame(f, opts);
+ int64_t end_sample = start_sample + frame_length;
+
+ if (opts.snip_edges) {
+ KNF_CHECK(start_sample >= sample_offset && end_sample <= num_samples);
+ } else {
+ KNF_CHECK(sample_offset == 0 || start_sample >= sample_offset);
+ }
+
+ if (window->size() != frame_length_padded) {
+ window->resize(frame_length_padded);
+ }
+
+ // wave_start and wave_end are start and end indexes into 'wave', for the
+ // piece of wave that we're trying to extract.
+ int32_t wave_start = int32_t(start_sample - sample_offset);
+ int32_t wave_end = wave_start + frame_length;
+
+ if (wave_start >= 0 && wave_end <= wave.size()) {
+ // the normal case-- no edge effects to consider.
+ std::copy(wave.begin() + wave_start,
+ wave.begin() + wave_start + frame_length, window->data());
+ } else {
+ // Deal with any end effects by reflection, if needed. This code will only
+ // be reached for about two frames per utterance, so we don't concern
+ // ourselves excessively with efficiency.
+ int32_t wave_dim = wave.size();
+ for (int32_t s = 0; s < frame_length; ++s) {
+ int32_t s_in_wave = s + wave_start;
+ while (s_in_wave < 0 || s_in_wave >= wave_dim) {
+ // reflect around the beginning or end of the wave.
+ // e.g. -1 -> 0, -2 -> 1.
+ // dim -> dim - 1, dim + 1 -> dim - 2.
+ // the code supports repeated reflections, although this
+ // would only be needed in pathological cases.
+ if (s_in_wave < 0)
+ s_in_wave = -s_in_wave - 1;
+ else
+ s_in_wave = 2 * wave_dim - 1 - s_in_wave;
+ }
+ (*window)[s] = wave[s_in_wave];
+ }
+ }
+
+ ProcessWindow(opts, window_function, window->data(), log_energy_pre_window);
+}
+
+static void RemoveDcOffset(float *d, int32_t n) {
+ float sum = 0;
+ for (int32_t i = 0; i != n; ++i) {
+ sum += d[i];
+ }
+
+ float mean = sum / n;
+
+ for (int32_t i = 0; i != n; ++i) {
+ d[i] -= mean;
+ }
+}
+
+float InnerProduct(const float *a, const float *b, int32_t n) {
+ float sum = 0;
+ for (int32_t i = 0; i != n; ++i) {
+ sum += a[i] * b[i];
+ }
+ return sum;
+}
+
+static void Preemphasize(float *d, int32_t n, float preemph_coeff) {
+ if (preemph_coeff == 0.0) {
+ return;
+ }
+
+ KNF_CHECK(preemph_coeff >= 0.0 && preemph_coeff <= 1.0);
+
+ for (int32_t i = n - 1; i > 0; --i) {
+ d[i] -= preemph_coeff * d[i - 1];
+ }
+ d[0] -= preemph_coeff * d[0];
+}
+
+void ProcessWindow(const FrameExtractionOptions &opts,
+ const FeatureWindowFunction &window_function, float *window,
+ float *log_energy_pre_window /*= nullptr*/) {
+ int32_t frame_length = opts.WindowSize();
+
+// // TODO(fangjun): Remove dither
+// KNF_CHECK_EQ(opts.dither, 0);
+
+ // Add dither function
+ // https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-window.cc
+ if (opts.dither!=0) {
+// kaldi::RandomState rstate;
+// rstate.seed=0;
+// for (int32 i = 0; i < frame_length; i++)
+// window[i] += RandGauss(&rstate) * opts.dither;
+ }
+
+ if (opts.remove_dc_offset) {
+ RemoveDcOffset(window, frame_length);
+ }
+
+ if (log_energy_pre_window != NULL) {
+ float energy = std::max<float>(InnerProduct(window, window, frame_length),
+ std::numeric_limits<float>::epsilon());
+ *log_energy_pre_window = std::log(energy);
+ }
+
+ if (opts.preemph_coeff != 0.0) {
+ Preemphasize(window, frame_length, opts.preemph_coeff);
+ }
+
+ window_function.Apply(window);
+}
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.h
new file mode 100644
index 0000000..32e8e2a
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.h
@@ -0,0 +1,178 @@
+// kaldi-native-fbank/csrc/feature-window.h
+//
+// Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+
+// This file is copied/modified from kaldi/src/feat/feature-window.h
+
+#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_WINDOW_H_
+#define KALDI_NATIVE_FBANK_CSRC_FEATURE_WINDOW_H_
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/log.h"
+
+namespace knf {
+
+inline int32_t RoundUpToNearestPowerOfTwo(int32_t n) {
+ // copied from kaldi/src/base/kaldi-math.cc
+ KNF_CHECK_GT(n, 0);
+ n--;
+ n |= n >> 1;
+ n |= n >> 2;
+ n |= n >> 4;
+ n |= n >> 8;
+ n |= n >> 16;
+ return n + 1;
+}
+
+struct FrameExtractionOptions {
+ float samp_freq = 16000;
+ float frame_shift_ms = 10.0f; // in milliseconds.
+ float frame_length_ms = 25.0f; // in milliseconds.
+ float dither = 1.0f; // Amount of dithering, 0.0 means no dither.
+ float preemph_coeff = 0.97f; // Preemphasis coefficient.
+ bool remove_dc_offset = true; // Subtract mean of wave before FFT.
+ std::string window_type = "povey"; // e.g. Hamming window
+ // May be "hamming", "rectangular", "povey", "hanning", "sine", "blackman"
+ // "povey" is a window I made to be similar to Hamming but to go to zero at
+ // the edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85) I just don't think the
+ // Hamming window makes sense as a windowing function.
+ bool round_to_power_of_two = true;
+ float blackman_coeff = 0.42f;
+ bool snip_edges = true;
+ // bool allow_downsample = false;
+ // bool allow_upsample = false;
+
+ // Used for streaming feature extraction. It indicates the number
+ // of feature frames to keep in the recycling vector. -1 means to
+ // keep all feature frames.
+ int32_t max_feature_vectors = -1;
+
+ int32_t WindowShift() const {
+ return static_cast<int32_t>(samp_freq * 0.001f * frame_shift_ms);
+ }
+ int32_t WindowSize() const {
+ return static_cast<int32_t>(samp_freq * 0.001f * frame_length_ms);
+ }
+ int32_t PaddedWindowSize() const {
+ return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize())
+ : WindowSize());
+ }
+ std::string ToString() const {
+ std::ostringstream os;
+#define KNF_PRINT(x) os << #x << ": " << x << "\n"
+ KNF_PRINT(samp_freq);
+ KNF_PRINT(frame_shift_ms);
+ KNF_PRINT(frame_length_ms);
+ KNF_PRINT(dither);
+ KNF_PRINT(preemph_coeff);
+ KNF_PRINT(remove_dc_offset);
+ KNF_PRINT(window_type);
+ KNF_PRINT(round_to_power_of_two);
+ KNF_PRINT(blackman_coeff);
+ KNF_PRINT(snip_edges);
+ // KNF_PRINT(allow_downsample);
+ // KNF_PRINT(allow_upsample);
+ KNF_PRINT(max_feature_vectors);
+#undef KNF_PRINT
+ return os.str();
+ }
+};
+
+std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts);
+
+class FeatureWindowFunction {
+ public:
+ FeatureWindowFunction() = default;
+ explicit FeatureWindowFunction(const FrameExtractionOptions &opts);
+ /**
+ * @param wave Pointer to a 1-D array of shape [window_size].
+ * It is modified in-place: wave[i] = wave[i] * window_[i].
+ * @param
+ */
+ void Apply(float *wave) const;
+
+ private:
+ std::vector<float> window_; // of size opts.WindowSize()
+};
+
+int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts);
+
+/**
+ This function returns the number of frames that we can extract from a wave
+ file with the given number of samples in it (assumed to have the same
+ sampling rate as specified in 'opts').
+
+ @param [in] num_samples The number of samples in the wave file.
+ @param [in] opts The frame-extraction options class
+
+ @param [in] flush True if we are asserting that this number of samples
+ is 'all there is', false if we expecting more data to possibly come in. This
+ only makes a difference to the answer
+ if opts.snips_edges== false. For offline feature extraction you always want
+ flush == true. In an online-decoding context, once you know (or decide) that
+ no more data is coming in, you'd call it with flush == true at the end to
+ flush out any remaining data.
+*/
+int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
+ bool flush = true);
+
+/*
+ ExtractWindow() extracts a windowed frame of waveform (possibly with a
+ power-of-two, padded size, depending on the config), including all the
+ processing done by ProcessWindow().
+
+ @param [in] sample_offset If 'wave' is not the entire waveform, but
+ part of it to the left has been discarded, then the
+ number of samples prior to 'wave' that we have
+ already discarded. Set this to zero if you are
+ processing the entire waveform in one piece, or
+ if you get 'no matching function' compilation
+ errors when updating the code.
+ @param [in] wave The waveform
+ @param [in] f The frame index to be extracted, with
+ 0 <= f < NumFrames(sample_offset + wave.Dim(), opts, true)
+ @param [in] opts The options class to be used
+ @param [in] window_function The windowing function, as derived from the
+ options class.
+ @param [out] window The windowed, possibly-padded waveform to be
+ extracted. Will be resized as needed.
+ @param [out] log_energy_pre_window If non-NULL, the log-energy of
+ the signal prior to pre-emphasis and multiplying by
+ the windowing function will be written to here.
+*/
+void ExtractWindow(int64_t sample_offset, const std::vector<float> &wave,
+ int32_t f, const FrameExtractionOptions &opts,
+ const FeatureWindowFunction &window_function,
+ std::vector<float> *window,
+ float *log_energy_pre_window = nullptr);
+
+/**
+ This function does all the windowing steps after actually
+ extracting the windowed signal: depending on the
+ configuration, it does dithering, dc offset removal,
+ preemphasis, and multiplication by the windowing function.
+ @param [in] opts The options class to be used
+ @param [in] window_function The windowing function-- should have
+ been initialized using 'opts'.
+ @param [in,out] window A vector of size opts.WindowSize(). Note:
+ it will typically be a sub-vector of a larger vector of size
+ opts.PaddedWindowSize(), with the remaining samples zero,
+ as the FFT code is more efficient if it operates on data with
+ power-of-two size.
+ @param [out] log_energy_pre_window If non-NULL, then after dithering and
+ DC offset removal, this function will write to this pointer the log of
+ the total energy (i.e. sum-squared) of the frame.
+ */
+void ProcessWindow(const FrameExtractionOptions &opts,
+ const FeatureWindowFunction &window_function, float *window,
+ float *log_energy_pre_window = nullptr);
+
+// Compute the inner product of two vectors
+float InnerProduct(const float *a, const float *b, int32_t n);
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_CSRC_FEATURE_WINDOW_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/fftsg.c b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/fftsg.c
new file mode 100644
index 0000000..40242c0
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/fftsg.c
@@ -0,0 +1,2968 @@
+/* This file is copied from
+ * https://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ */
+/*
+Fast Fourier/Cosine/Sine Transform
+ dimension :one
+ data length :power of 2
+ decimation :frequency
+ radix :split-radix
+ data :inplace
+ table :use
+functions
+ cdft: Complex Discrete Fourier Transform
+ rdft: Real Discrete Fourier Transform
+ ddct: Discrete Cosine Transform
+ ddst: Discrete Sine Transform
+ dfct: Cosine Transform of RDFT (Real Symmetric DFT)
+ dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
+function prototypes
+ void cdft(int, int, double *, int *, double *);
+ void rdft(int, int, double *, int *, double *);
+ void ddct(int, int, double *, int *, double *);
+ void ddst(int, int, double *, int *, double *);
+ void dfct(int, double *, double *, int *, double *);
+ void dfst(int, double *, double *, int *, double *);
+macro definitions
+ USE_CDFT_PTHREADS : default=not defined
+ CDFT_THREADS_BEGIN_N : must be >= 512, default=8192
+ CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
+ USE_CDFT_WINTHREADS : default=not defined
+ CDFT_THREADS_BEGIN_N : must be >= 512, default=32768
+ CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288
+
+
+-------- Complex DFT (Discrete Fourier Transform) --------
+ [definition]
+ <case1>
+ X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
+ <case2>
+ X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
+ (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
+ [usage]
+ <case1>
+ ip[0] = 0; // first time only
+ cdft(2*n, 1, a, ip, w);
+ <case2>
+ ip[0] = 0; // first time only
+ cdft(2*n, -1, a, ip, w);
+ [parameters]
+ 2*n :data length (int)
+ n >= 1, n = power of 2
+ a[0...2*n-1] :input/output data (double *)
+ input data
+ a[2*j] = Re(x[j]),
+ a[2*j+1] = Im(x[j]), 0<=j<n
+ output data
+ a[2*k] = Re(X[k]),
+ a[2*k+1] = Im(X[k]), 0<=k<n
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n/2-1] :cos/sin table (double *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ cdft(2*n, -1, a, ip, w);
+ is
+ cdft(2*n, 1, a, ip, w);
+ for (j = 0; j <= 2 * n - 1; j++) {
+ a[j] *= 1.0 / n;
+ }
+ .
+
+
+-------- Real DFT / Inverse of Real DFT --------
+ [definition]
+ <case1> RDFT
+ R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
+ I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
+ <case2> IRDFT (excluding scale)
+ a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
+ sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
+ sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
+ [usage]
+ <case1>
+ ip[0] = 0; // first time only
+ rdft(n, 1, a, ip, w);
+ <case2>
+ ip[0] = 0; // first time only
+ rdft(n, -1, a, ip, w);
+ [parameters]
+ n :data length (int)
+ n >= 2, n = power of 2
+ a[0...n-1] :input/output data (double *)
+ <case1>
+ output data
+ a[2*k] = R[k], 0<=k<n/2
+ a[2*k+1] = I[k], 0<k<n/2
+ a[1] = R[n/2]
+ <case2>
+ input data
+ a[2*j] = R[j], 0<=j<n/2
+ a[2*j+1] = I[j], 0<j<n/2
+ a[1] = R[n/2]
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/2)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n/2-1] :cos/sin table (double *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ rdft(n, 1, a, ip, w);
+ is
+ rdft(n, -1, a, ip, w);
+ for (j = 0; j <= n - 1; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
+ [definition]
+ <case1> IDCT (excluding scale)
+ C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
+ <case2> DCT
+ C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
+ [usage]
+ <case1>
+ ip[0] = 0; // first time only
+ ddct(n, 1, a, ip, w);
+ <case2>
+ ip[0] = 0; // first time only
+ ddct(n, -1, a, ip, w);
+ [parameters]
+ n :data length (int)
+ n >= 2, n = power of 2
+ a[0...n-1] :input/output data (double *)
+ output data
+ a[k] = C[k], 0<=k<n
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/2)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n*5/4-1] :cos/sin table (double *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ ddct(n, -1, a, ip, w);
+ is
+ a[0] *= 0.5;
+ ddct(n, 1, a, ip, w);
+ for (j = 0; j <= n - 1; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+-------- DST (Discrete Sine Transform) / Inverse of DST --------
+ [definition]
+ <case1> IDST (excluding scale)
+ S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
+ <case2> DST
+ S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
+ [usage]
+ <case1>
+ ip[0] = 0; // first time only
+ ddst(n, 1, a, ip, w);
+ <case2>
+ ip[0] = 0; // first time only
+ ddst(n, -1, a, ip, w);
+ [parameters]
+ n :data length (int)
+ n >= 2, n = power of 2
+ a[0...n-1] :input/output data (double *)
+ <case1>
+ input data
+ a[j] = A[j], 0<j<n
+ a[0] = A[n]
+ output data
+ a[k] = S[k], 0<=k<n
+ <case2>
+ output data
+ a[k] = S[k], 0<k<n
+ a[0] = S[n]
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/2)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n*5/4-1] :cos/sin table (double *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ ddst(n, -1, a, ip, w);
+ is
+ a[0] *= 0.5;
+ ddst(n, 1, a, ip, w);
+ for (j = 0; j <= n - 1; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
+ [definition]
+ C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
+ [usage]
+ ip[0] = 0; // first time only
+ dfct(n, a, t, ip, w);
+ [parameters]
+ n :data length - 1 (int)
+ n >= 2, n = power of 2
+ a[0...n] :input/output data (double *)
+ output data
+ a[k] = C[k], 0<=k<=n
+ t[0...n/2] :work area (double *)
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/4)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n*5/8-1] :cos/sin table (double *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ a[0] *= 0.5;
+ a[n] *= 0.5;
+ dfct(n, a, t, ip, w);
+ is
+ a[0] *= 0.5;
+ a[n] *= 0.5;
+ dfct(n, a, t, ip, w);
+ for (j = 0; j <= n; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
+ [definition]
+ S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
+ [usage]
+ ip[0] = 0; // first time only
+ dfst(n, a, t, ip, w);
+ [parameters]
+ n :data length + 1 (int)
+ n >= 2, n = power of 2
+ a[0...n-1] :input/output data (double *)
+ output data
+ a[k] = S[k], 0<k<n
+ (a[0] is used for work area)
+ t[0...n/2-1] :work area (double *)
+ ip[0...*] :work area for bit reversal (int *)
+ length of ip >= 2+sqrt(n/4)
+ strictly,
+ length of ip >=
+ 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+ ip[0],ip[1] are pointers of the cos/sin table.
+ w[0...n*5/8-1] :cos/sin table (double *)
+ w[],ip[] are initialized if ip[0] == 0.
+ [remark]
+ Inverse of
+ dfst(n, a, t, ip, w);
+ is
+ dfst(n, a, t, ip, w);
+ for (j = 1; j <= n - 1; j++) {
+ a[j] *= 2.0 / n;
+ }
+ .
+
+
+Appendix :
+ The cos/sin table is recalculated when the larger table required.
+ w[] and ip[] are compatible with all routines.
+*/
+
+
+
+void rdft(int n, int isgn, double *a, int *ip, double *w)
+{
+ void makewt(int nw, int *ip, double *w);
+ void makect(int nc, int *ip, double *c);
+ void cftfsub(int n, double *a, int *ip, int nw, double *w);
+ void cftbsub(int n, double *a, int *ip, int nw, double *w);
+ void rftfsub(int n, double *a, int nc, double *c);
+ void rftbsub(int n, double *a, int nc, double *c);
+ int nw, nc;
+ double xi;
+
+ nw = ip[0];
+ if (n > (nw << 2)) {
+ nw = n >> 2;
+ makewt(nw, ip, w);
+ }
+ nc = ip[1];
+ if (n > (nc << 2)) {
+ nc = n >> 2;
+ makect(nc, ip, w + nw);
+ }
+ if (isgn >= 0) {
+ if (n > 4) {
+ cftfsub(n, a, ip, nw, w);
+ rftfsub(n, a, nc, w + nw);
+ } else if (n == 4) {
+ cftfsub(n, a, ip, nw, w);
+ }
+ xi = a[0] - a[1];
+ a[0] += a[1];
+ a[1] = xi;
+ } else {
+ a[1] = 0.5 * (a[0] - a[1]);
+ a[0] -= a[1];
+ if (n > 4) {
+ rftbsub(n, a, nc, w + nw);
+ cftbsub(n, a, ip, nw, w);
+ } else if (n == 4) {
+ cftbsub(n, a, ip, nw, w);
+ }
+ }
+}
+
+
+/* -------- initializing routines -------- */
+
+
+#include <math.h>
+
+void makewt(int nw, int *ip, double *w)
+{
+ void makeipt(int nw, int *ip);
+ int j, nwh, nw0, nw1;
+ double delta, wn4r, wk1r, wk1i, wk3r, wk3i;
+
+ ip[0] = nw;
+ ip[1] = 1;
+ if (nw > 2) {
+ nwh = nw >> 1;
+ delta = atan(1.0) / nwh;
+ wn4r = cos(delta * nwh);
+ w[0] = 1;
+ w[1] = wn4r;
+ if (nwh == 4) {
+ w[2] = cos(delta * 2);
+ w[3] = sin(delta * 2);
+ } else if (nwh > 4) {
+ makeipt(nw, ip);
+ w[2] = 0.5 / cos(delta * 2);
+ w[3] = 0.5 / cos(delta * 6);
+ for (j = 4; j < nwh; j += 4) {
+ w[j] = cos(delta * j);
+ w[j + 1] = sin(delta * j);
+ w[j + 2] = cos(3 * delta * j);
+ w[j + 3] = -sin(3 * delta * j);
+ }
+ }
+ nw0 = 0;
+ while (nwh > 2) {
+ nw1 = nw0 + nwh;
+ nwh >>= 1;
+ w[nw1] = 1;
+ w[nw1 + 1] = wn4r;
+ if (nwh == 4) {
+ wk1r = w[nw0 + 4];
+ wk1i = w[nw0 + 5];
+ w[nw1 + 2] = wk1r;
+ w[nw1 + 3] = wk1i;
+ } else if (nwh > 4) {
+ wk1r = w[nw0 + 4];
+ wk3r = w[nw0 + 6];
+ w[nw1 + 2] = 0.5 / wk1r;
+ w[nw1 + 3] = 0.5 / wk3r;
+ for (j = 4; j < nwh; j += 4) {
+ wk1r = w[nw0 + 2 * j];
+ wk1i = w[nw0 + 2 * j + 1];
+ wk3r = w[nw0 + 2 * j + 2];
+ wk3i = w[nw0 + 2 * j + 3];
+ w[nw1 + j] = wk1r;
+ w[nw1 + j + 1] = wk1i;
+ w[nw1 + j + 2] = wk3r;
+ w[nw1 + j + 3] = wk3i;
+ }
+ }
+ nw0 = nw1;
+ }
+ }
+}
+
+
+void makeipt(int nw, int *ip)
+{
+ int j, l, m, m2, p, q;
+
+ ip[2] = 0;
+ ip[3] = 16;
+ m = 2;
+ for (l = nw; l > 32; l >>= 2) {
+ m2 = m << 1;
+ q = m2 << 3;
+ for (j = m; j < m2; j++) {
+ p = ip[j] << 2;
+ ip[m + j] = p;
+ ip[m2 + j] = p + q;
+ }
+ m = m2;
+ }
+}
+
+
+void makect(int nc, int *ip, double *c)
+{
+ int j, nch;
+ double delta;
+
+ ip[1] = nc;
+ if (nc > 1) {
+ nch = nc >> 1;
+ delta = atan(1.0) / nch;
+ c[0] = cos(delta * nch);
+ c[nch] = 0.5 * c[0];
+ for (j = 1; j < nch; j++) {
+ c[j] = 0.5 * cos(delta * j);
+ c[nc - j] = 0.5 * sin(delta * j);
+ }
+ }
+}
+
+
+/* -------- child routines -------- */
+
+
+#ifdef USE_CDFT_PTHREADS
+#define USE_CDFT_THREADS
+#ifndef CDFT_THREADS_BEGIN_N
+#define CDFT_THREADS_BEGIN_N 8192
+#endif
+#ifndef CDFT_4THREADS_BEGIN_N
+#define CDFT_4THREADS_BEGIN_N 65536
+#endif
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#define cdft_thread_t pthread_t
+#define cdft_thread_create(thp,func,argp) { \
+ if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \
+ fprintf(stderr, "cdft thread error\n"); \
+ exit(1); \
+ } \
+}
+#define cdft_thread_wait(th) { \
+ if (pthread_join(th, NULL) != 0) { \
+ fprintf(stderr, "cdft thread error\n"); \
+ exit(1); \
+ } \
+}
+#endif /* USE_CDFT_PTHREADS */
+
+
+#ifdef USE_CDFT_WINTHREADS
+#define USE_CDFT_THREADS
+#ifndef CDFT_THREADS_BEGIN_N
+#define CDFT_THREADS_BEGIN_N 32768
+#endif
+#ifndef CDFT_4THREADS_BEGIN_N
+#define CDFT_4THREADS_BEGIN_N 524288
+#endif
+#include <windows.h>
+#include <stdio.h>
+#include <stdlib.h>
+#define cdft_thread_t HANDLE
+#define cdft_thread_create(thp,func,argp) { \
+ DWORD thid; \
+ *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \
+ if (*(thp) == 0) { \
+ fprintf(stderr, "cdft thread error\n"); \
+ exit(1); \
+ } \
+}
+#define cdft_thread_wait(th) { \
+ WaitForSingleObject(th, INFINITE); \
+ CloseHandle(th); \
+}
+#endif /* USE_CDFT_WINTHREADS */
+
+
+void cftfsub(int n, double *a, int *ip, int nw, double *w)
+{
+ void bitrv2(int n, int *ip, double *a);
+ void bitrv216(double *a);
+ void bitrv208(double *a);
+ void cftf1st(int n, double *a, double *w);
+ void cftrec4(int n, double *a, int nw, double *w);
+ void cftleaf(int n, int isplt, double *a, int nw, double *w);
+ void cftfx41(int n, double *a, int nw, double *w);
+ void cftf161(double *a, double *w);
+ void cftf081(double *a, double *w);
+ void cftf040(double *a);
+ void cftx020(double *a);
+#ifdef USE_CDFT_THREADS
+ void cftrec4_th(int n, double *a, int nw, double *w);
+#endif /* USE_CDFT_THREADS */
+
+ if (n > 8) {
+ if (n > 32) {
+ cftf1st(n, a, &w[nw - (n >> 2)]);
+#ifdef USE_CDFT_THREADS
+ if (n > CDFT_THREADS_BEGIN_N) {
+ cftrec4_th(n, a, nw, w);
+ } else
+#endif /* USE_CDFT_THREADS */
+ if (n > 512) {
+ cftrec4(n, a, nw, w);
+ } else if (n > 128) {
+ cftleaf(n, 1, a, nw, w);
+ } else {
+ cftfx41(n, a, nw, w);
+ }
+ bitrv2(n, ip, a);
+ } else if (n == 32) {
+ cftf161(a, &w[nw - 8]);
+ bitrv216(a);
+ } else {
+ cftf081(a, w);
+ bitrv208(a);
+ }
+ } else if (n == 8) {
+ cftf040(a);
+ } else if (n == 4) {
+ cftx020(a);
+ }
+}
+
+
+void cftbsub(int n, double *a, int *ip, int nw, double *w)
+{
+ void bitrv2conj(int n, int *ip, double *a);
+ void bitrv216neg(double *a);
+ void bitrv208neg(double *a);
+ void cftb1st(int n, double *a, double *w);
+ void cftrec4(int n, double *a, int nw, double *w);
+ void cftleaf(int n, int isplt, double *a, int nw, double *w);
+ void cftfx41(int n, double *a, int nw, double *w);
+ void cftf161(double *a, double *w);
+ void cftf081(double *a, double *w);
+ void cftb040(double *a);
+ void cftx020(double *a);
+#ifdef USE_CDFT_THREADS
+ void cftrec4_th(int n, double *a, int nw, double *w);
+#endif /* USE_CDFT_THREADS */
+
+ if (n > 8) {
+ if (n > 32) {
+ cftb1st(n, a, &w[nw - (n >> 2)]);
+#ifdef USE_CDFT_THREADS
+ if (n > CDFT_THREADS_BEGIN_N) {
+ cftrec4_th(n, a, nw, w);
+ } else
+#endif /* USE_CDFT_THREADS */
+ if (n > 512) {
+ cftrec4(n, a, nw, w);
+ } else if (n > 128) {
+ cftleaf(n, 1, a, nw, w);
+ } else {
+ cftfx41(n, a, nw, w);
+ }
+ bitrv2conj(n, ip, a);
+ } else if (n == 32) {
+ cftf161(a, &w[nw - 8]);
+ bitrv216neg(a);
+ } else {
+ cftf081(a, w);
+ bitrv208neg(a);
+ }
+ } else if (n == 8) {
+ cftb040(a);
+ } else if (n == 4) {
+ cftx020(a);
+ }
+}
+
+
+void bitrv2(int n, int *ip, double *a)
+{
+ int j, j1, k, k1, l, m, nh, nm;
+ double xr, xi, yr, yi;
+
+ m = 1;
+ for (l = n >> 2; l > 8; l >>= 2) {
+ m <<= 1;
+ }
+ nh = n >> 1;
+ nm = 4 * m;
+ if (l == 8) {
+ for (k = 0; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 4 * j + 2 * ip[m + k];
+ k1 = 4 * k + 2 * ip[m + j];
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += 2 * nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 -= nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += 2 * nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nh;
+ k1 += 2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 -= 2 * nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 += nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 -= 2 * nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += 2;
+ k1 += nh;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += 2 * nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 -= nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += 2 * nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nh;
+ k1 -= 2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 -= 2 * nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 += nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 -= 2 * nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ k1 = 4 * k + 2 * ip[m + k];
+ j1 = k1 + 2;
+ k1 += nh;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += 2 * nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 -= nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= 2;
+ k1 -= nh;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nh + 2;
+ k1 += nh + 2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nh - nm;
+ k1 += 2 * nm - 2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ } else {
+ for (k = 0; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 4 * j + ip[m + k];
+ k1 = 4 * k + ip[m + j];
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nh;
+ k1 += 2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 -= nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += 2;
+ k1 += nh;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nh;
+ k1 -= 2;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 -= nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ k1 = 4 * k + ip[m + k];
+ j1 = k1 + 2;
+ k1 += nh;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += nm;
+ xr = a[j1];
+ xi = a[j1 + 1];
+ yr = a[k1];
+ yi = a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ }
+}
+
+
+void bitrv2conj(int n, int *ip, double *a)
+{
+ int j, j1, k, k1, l, m, nh, nm;
+ double xr, xi, yr, yi;
+
+ m = 1;
+ for (l = n >> 2; l > 8; l >>= 2) {
+ m <<= 1;
+ }
+ nh = n >> 1;
+ nm = 4 * m;
+ if (l == 8) {
+ for (k = 0; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 4 * j + 2 * ip[m + k];
+ k1 = 4 * k + 2 * ip[m + j];
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += 2 * nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 -= nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += 2 * nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nh;
+ k1 += 2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 -= 2 * nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 += nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 -= 2 * nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += 2;
+ k1 += nh;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += 2 * nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 -= nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += 2 * nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nh;
+ k1 -= 2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 -= 2 * nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 += nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 -= 2 * nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ k1 = 4 * k + 2 * ip[m + k];
+ j1 = k1 + 2;
+ k1 += nh;
+ a[j1 - 1] = -a[j1 - 1];
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ a[k1 + 3] = -a[k1 + 3];
+ j1 += nm;
+ k1 += 2 * nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 -= nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= 2;
+ k1 -= nh;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nh + 2;
+ k1 += nh + 2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nh - nm;
+ k1 += 2 * nm - 2;
+ a[j1 - 1] = -a[j1 - 1];
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ a[k1 + 3] = -a[k1 + 3];
+ }
+ } else {
+ for (k = 0; k < m; k++) {
+ for (j = 0; j < k; j++) {
+ j1 = 4 * j + ip[m + k];
+ k1 = 4 * k + ip[m + j];
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nh;
+ k1 += 2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 -= nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += 2;
+ k1 += nh;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 += nm;
+ k1 += nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nh;
+ k1 -= 2;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ j1 -= nm;
+ k1 -= nm;
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ }
+ k1 = 4 * k + ip[m + k];
+ j1 = k1 + 2;
+ k1 += nh;
+ a[j1 - 1] = -a[j1 - 1];
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ a[k1 + 3] = -a[k1 + 3];
+ j1 += nm;
+ k1 += nm;
+ a[j1 - 1] = -a[j1 - 1];
+ xr = a[j1];
+ xi = -a[j1 + 1];
+ yr = a[k1];
+ yi = -a[k1 + 1];
+ a[j1] = yr;
+ a[j1 + 1] = yi;
+ a[k1] = xr;
+ a[k1 + 1] = xi;
+ a[k1 + 3] = -a[k1 + 3];
+ }
+ }
+}
+
+
+void bitrv216(double *a)
+{
+ double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
+ x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i,
+ x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
+
+ x1r = a[2];
+ x1i = a[3];
+ x2r = a[4];
+ x2i = a[5];
+ x3r = a[6];
+ x3i = a[7];
+ x4r = a[8];
+ x4i = a[9];
+ x5r = a[10];
+ x5i = a[11];
+ x7r = a[14];
+ x7i = a[15];
+ x8r = a[16];
+ x8i = a[17];
+ x10r = a[20];
+ x10i = a[21];
+ x11r = a[22];
+ x11i = a[23];
+ x12r = a[24];
+ x12i = a[25];
+ x13r = a[26];
+ x13i = a[27];
+ x14r = a[28];
+ x14i = a[29];
+ a[2] = x8r;
+ a[3] = x8i;
+ a[4] = x4r;
+ a[5] = x4i;
+ a[6] = x12r;
+ a[7] = x12i;
+ a[8] = x2r;
+ a[9] = x2i;
+ a[10] = x10r;
+ a[11] = x10i;
+ a[14] = x14r;
+ a[15] = x14i;
+ a[16] = x1r;
+ a[17] = x1i;
+ a[20] = x5r;
+ a[21] = x5i;
+ a[22] = x13r;
+ a[23] = x13i;
+ a[24] = x3r;
+ a[25] = x3i;
+ a[26] = x11r;
+ a[27] = x11i;
+ a[28] = x7r;
+ a[29] = x7i;
+}
+
+
+void bitrv216neg(double *a)
+{
+ double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
+ x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i,
+ x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i,
+ x13r, x13i, x14r, x14i, x15r, x15i;
+
+ x1r = a[2];
+ x1i = a[3];
+ x2r = a[4];
+ x2i = a[5];
+ x3r = a[6];
+ x3i = a[7];
+ x4r = a[8];
+ x4i = a[9];
+ x5r = a[10];
+ x5i = a[11];
+ x6r = a[12];
+ x6i = a[13];
+ x7r = a[14];
+ x7i = a[15];
+ x8r = a[16];
+ x8i = a[17];
+ x9r = a[18];
+ x9i = a[19];
+ x10r = a[20];
+ x10i = a[21];
+ x11r = a[22];
+ x11i = a[23];
+ x12r = a[24];
+ x12i = a[25];
+ x13r = a[26];
+ x13i = a[27];
+ x14r = a[28];
+ x14i = a[29];
+ x15r = a[30];
+ x15i = a[31];
+ a[2] = x15r;
+ a[3] = x15i;
+ a[4] = x7r;
+ a[5] = x7i;
+ a[6] = x11r;
+ a[7] = x11i;
+ a[8] = x3r;
+ a[9] = x3i;
+ a[10] = x13r;
+ a[11] = x13i;
+ a[12] = x5r;
+ a[13] = x5i;
+ a[14] = x9r;
+ a[15] = x9i;
+ a[16] = x1r;
+ a[17] = x1i;
+ a[18] = x14r;
+ a[19] = x14i;
+ a[20] = x6r;
+ a[21] = x6i;
+ a[22] = x10r;
+ a[23] = x10i;
+ a[24] = x2r;
+ a[25] = x2i;
+ a[26] = x12r;
+ a[27] = x12i;
+ a[28] = x4r;
+ a[29] = x4i;
+ a[30] = x8r;
+ a[31] = x8i;
+}
+
+
+void bitrv208(double *a)
+{
+ double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
+
+ x1r = a[2];
+ x1i = a[3];
+ x3r = a[6];
+ x3i = a[7];
+ x4r = a[8];
+ x4i = a[9];
+ x6r = a[12];
+ x6i = a[13];
+ a[2] = x4r;
+ a[3] = x4i;
+ a[6] = x6r;
+ a[7] = x6i;
+ a[8] = x1r;
+ a[9] = x1i;
+ a[12] = x3r;
+ a[13] = x3i;
+}
+
+
+void bitrv208neg(double *a)
+{
+ double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
+ x5r, x5i, x6r, x6i, x7r, x7i;
+
+ x1r = a[2];
+ x1i = a[3];
+ x2r = a[4];
+ x2i = a[5];
+ x3r = a[6];
+ x3i = a[7];
+ x4r = a[8];
+ x4i = a[9];
+ x5r = a[10];
+ x5i = a[11];
+ x6r = a[12];
+ x6i = a[13];
+ x7r = a[14];
+ x7i = a[15];
+ a[2] = x7r;
+ a[3] = x7i;
+ a[4] = x3r;
+ a[5] = x3i;
+ a[6] = x5r;
+ a[7] = x5i;
+ a[8] = x1r;
+ a[9] = x1i;
+ a[10] = x6r;
+ a[11] = x6i;
+ a[12] = x2r;
+ a[13] = x2i;
+ a[14] = x4r;
+ a[15] = x4i;
+}
+
+
+void cftf1st(int n, double *a, double *w)
+{
+ int j, j0, j1, j2, j3, k, m, mh;
+ double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
+ wd1r, wd1i, wd3r, wd3i;
+ double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
+ y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
+
+ mh = n >> 3;
+ m = 2 * mh;
+ j1 = m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[0] + a[j2];
+ x0i = a[1] + a[j2 + 1];
+ x1r = a[0] - a[j2];
+ x1i = a[1] - a[j2 + 1];
+ x2r = a[j1] + a[j3];
+ x2i = a[j1 + 1] + a[j3 + 1];
+ x3r = a[j1] - a[j3];
+ x3i = a[j1 + 1] - a[j3 + 1];
+ a[0] = x0r + x2r;
+ a[1] = x0i + x2i;
+ a[j1] = x0r - x2r;
+ a[j1 + 1] = x0i - x2i;
+ a[j2] = x1r - x3i;
+ a[j2 + 1] = x1i + x3r;
+ a[j3] = x1r + x3i;
+ a[j3 + 1] = x1i - x3r;
+ wn4r = w[1];
+ csc1 = w[2];
+ csc3 = w[3];
+ wd1r = 1;
+ wd1i = 0;
+ wd3r = 1;
+ wd3i = 0;
+ k = 0;
+ for (j = 2; j < mh - 2; j += 4) {
+ k += 4;
+ wk1r = csc1 * (wd1r + w[k]);
+ wk1i = csc1 * (wd1i + w[k + 1]);
+ wk3r = csc3 * (wd3r + w[k + 2]);
+ wk3i = csc3 * (wd3i + w[k + 3]);
+ wd1r = w[k];
+ wd1i = w[k + 1];
+ wd3r = w[k + 2];
+ wd3i = w[k + 3];
+ j1 = j + m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[j] + a[j2];
+ x0i = a[j + 1] + a[j2 + 1];
+ x1r = a[j] - a[j2];
+ x1i = a[j + 1] - a[j2 + 1];
+ y0r = a[j + 2] + a[j2 + 2];
+ y0i = a[j + 3] + a[j2 + 3];
+ y1r = a[j + 2] - a[j2 + 2];
+ y1i = a[j + 3] - a[j2 + 3];
+ x2r = a[j1] + a[j3];
+ x2i = a[j1 + 1] + a[j3 + 1];
+ x3r = a[j1] - a[j3];
+ x3i = a[j1 + 1] - a[j3 + 1];
+ y2r = a[j1 + 2] + a[j3 + 2];
+ y2i = a[j1 + 3] + a[j3 + 3];
+ y3r = a[j1 + 2] - a[j3 + 2];
+ y3i = a[j1 + 3] - a[j3 + 3];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ a[j + 2] = y0r + y2r;
+ a[j + 3] = y0i + y2i;
+ a[j1] = x0r - x2r;
+ a[j1 + 1] = x0i - x2i;
+ a[j1 + 2] = y0r - y2r;
+ a[j1 + 3] = y0i - y2i;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j2] = wk1r * x0r - wk1i * x0i;
+ a[j2 + 1] = wk1r * x0i + wk1i * x0r;
+ x0r = y1r - y3i;
+ x0i = y1i + y3r;
+ a[j2 + 2] = wd1r * x0r - wd1i * x0i;
+ a[j2 + 3] = wd1r * x0i + wd1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3] = wk3r * x0r + wk3i * x0i;
+ a[j3 + 1] = wk3r * x0i - wk3i * x0r;
+ x0r = y1r + y3i;
+ x0i = y1i - y3r;
+ a[j3 + 2] = wd3r * x0r + wd3i * x0i;
+ a[j3 + 3] = wd3r * x0i - wd3i * x0r;
+ j0 = m - j;
+ j1 = j0 + m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[j0] + a[j2];
+ x0i = a[j0 + 1] + a[j2 + 1];
+ x1r = a[j0] - a[j2];
+ x1i = a[j0 + 1] - a[j2 + 1];
+ y0r = a[j0 - 2] + a[j2 - 2];
+ y0i = a[j0 - 1] + a[j2 - 1];
+ y1r = a[j0 - 2] - a[j2 - 2];
+ y1i = a[j0 - 1] - a[j2 - 1];
+ x2r = a[j1] + a[j3];
+ x2i = a[j1 + 1] + a[j3 + 1];
+ x3r = a[j1] - a[j3];
+ x3i = a[j1 + 1] - a[j3 + 1];
+ y2r = a[j1 - 2] + a[j3 - 2];
+ y2i = a[j1 - 1] + a[j3 - 1];
+ y3r = a[j1 - 2] - a[j3 - 2];
+ y3i = a[j1 - 1] - a[j3 - 1];
+ a[j0] = x0r + x2r;
+ a[j0 + 1] = x0i + x2i;
+ a[j0 - 2] = y0r + y2r;
+ a[j0 - 1] = y0i + y2i;
+ a[j1] = x0r - x2r;
+ a[j1 + 1] = x0i - x2i;
+ a[j1 - 2] = y0r - y2r;
+ a[j1 - 1] = y0i - y2i;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j2] = wk1i * x0r - wk1r * x0i;
+ a[j2 + 1] = wk1i * x0i + wk1r * x0r;
+ x0r = y1r - y3i;
+ x0i = y1i + y3r;
+ a[j2 - 2] = wd1i * x0r - wd1r * x0i;
+ a[j2 - 1] = wd1i * x0i + wd1r * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3] = wk3i * x0r + wk3r * x0i;
+ a[j3 + 1] = wk3i * x0i - wk3r * x0r;
+ x0r = y1r + y3i;
+ x0i = y1i - y3r;
+ a[j3 - 2] = wd3i * x0r + wd3r * x0i;
+ a[j3 - 1] = wd3i * x0i - wd3r * x0r;
+ }
+ wk1r = csc1 * (wd1r + wn4r);
+ wk1i = csc1 * (wd1i + wn4r);
+ wk3r = csc3 * (wd3r - wn4r);
+ wk3i = csc3 * (wd3i - wn4r);
+ j0 = mh;
+ j1 = j0 + m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[j0 - 2] + a[j2 - 2];
+ x0i = a[j0 - 1] + a[j2 - 1];
+ x1r = a[j0 - 2] - a[j2 - 2];
+ x1i = a[j0 - 1] - a[j2 - 1];
+ x2r = a[j1 - 2] + a[j3 - 2];
+ x2i = a[j1 - 1] + a[j3 - 1];
+ x3r = a[j1 - 2] - a[j3 - 2];
+ x3i = a[j1 - 1] - a[j3 - 1];
+ a[j0 - 2] = x0r + x2r;
+ a[j0 - 1] = x0i + x2i;
+ a[j1 - 2] = x0r - x2r;
+ a[j1 - 1] = x0i - x2i;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j2 - 2] = wk1r * x0r - wk1i * x0i;
+ a[j2 - 1] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3 - 2] = wk3r * x0r + wk3i * x0i;
+ a[j3 - 1] = wk3r * x0i - wk3i * x0r;
+ x0r = a[j0] + a[j2];
+ x0i = a[j0 + 1] + a[j2 + 1];
+ x1r = a[j0] - a[j2];
+ x1i = a[j0 + 1] - a[j2 + 1];
+ x2r = a[j1] + a[j3];
+ x2i = a[j1 + 1] + a[j3 + 1];
+ x3r = a[j1] - a[j3];
+ x3i = a[j1 + 1] - a[j3 + 1];
+ a[j0] = x0r + x2r;
+ a[j0 + 1] = x0i + x2i;
+ a[j1] = x0r - x2r;
+ a[j1 + 1] = x0i - x2i;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j2] = wn4r * (x0r - x0i);
+ a[j2 + 1] = wn4r * (x0i + x0r);
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3] = -wn4r * (x0r + x0i);
+ a[j3 + 1] = -wn4r * (x0i - x0r);
+ x0r = a[j0 + 2] + a[j2 + 2];
+ x0i = a[j0 + 3] + a[j2 + 3];
+ x1r = a[j0 + 2] - a[j2 + 2];
+ x1i = a[j0 + 3] - a[j2 + 3];
+ x2r = a[j1 + 2] + a[j3 + 2];
+ x2i = a[j1 + 3] + a[j3 + 3];
+ x3r = a[j1 + 2] - a[j3 + 2];
+ x3i = a[j1 + 3] - a[j3 + 3];
+ a[j0 + 2] = x0r + x2r;
+ a[j0 + 3] = x0i + x2i;
+ a[j1 + 2] = x0r - x2r;
+ a[j1 + 3] = x0i - x2i;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j2 + 2] = wk1i * x0r - wk1r * x0i;
+ a[j2 + 3] = wk1i * x0i + wk1r * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3 + 2] = wk3i * x0r + wk3r * x0i;
+ a[j3 + 3] = wk3i * x0i - wk3r * x0r;
+}
+
+
+void cftb1st(int n, double *a, double *w)
+{
+ int j, j0, j1, j2, j3, k, m, mh;
+ double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
+ wd1r, wd1i, wd3r, wd3i;
+ double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
+ y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
+
+ mh = n >> 3;
+ m = 2 * mh;
+ j1 = m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[0] + a[j2];
+ x0i = -a[1] - a[j2 + 1];
+ x1r = a[0] - a[j2];
+ x1i = -a[1] + a[j2 + 1];
+ x2r = a[j1] + a[j3];
+ x2i = a[j1 + 1] + a[j3 + 1];
+ x3r = a[j1] - a[j3];
+ x3i = a[j1 + 1] - a[j3 + 1];
+ a[0] = x0r + x2r;
+ a[1] = x0i - x2i;
+ a[j1] = x0r - x2r;
+ a[j1 + 1] = x0i + x2i;
+ a[j2] = x1r + x3i;
+ a[j2 + 1] = x1i + x3r;
+ a[j3] = x1r - x3i;
+ a[j3 + 1] = x1i - x3r;
+ wn4r = w[1];
+ csc1 = w[2];
+ csc3 = w[3];
+ wd1r = 1;
+ wd1i = 0;
+ wd3r = 1;
+ wd3i = 0;
+ k = 0;
+ for (j = 2; j < mh - 2; j += 4) {
+ k += 4;
+ wk1r = csc1 * (wd1r + w[k]);
+ wk1i = csc1 * (wd1i + w[k + 1]);
+ wk3r = csc3 * (wd3r + w[k + 2]);
+ wk3i = csc3 * (wd3i + w[k + 3]);
+ wd1r = w[k];
+ wd1i = w[k + 1];
+ wd3r = w[k + 2];
+ wd3i = w[k + 3];
+ j1 = j + m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[j] + a[j2];
+ x0i = -a[j + 1] - a[j2 + 1];
+ x1r = a[j] - a[j2];
+ x1i = -a[j + 1] + a[j2 + 1];
+ y0r = a[j + 2] + a[j2 + 2];
+ y0i = -a[j + 3] - a[j2 + 3];
+ y1r = a[j + 2] - a[j2 + 2];
+ y1i = -a[j + 3] + a[j2 + 3];
+ x2r = a[j1] + a[j3];
+ x2i = a[j1 + 1] + a[j3 + 1];
+ x3r = a[j1] - a[j3];
+ x3i = a[j1 + 1] - a[j3 + 1];
+ y2r = a[j1 + 2] + a[j3 + 2];
+ y2i = a[j1 + 3] + a[j3 + 3];
+ y3r = a[j1 + 2] - a[j3 + 2];
+ y3i = a[j1 + 3] - a[j3 + 3];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i - x2i;
+ a[j + 2] = y0r + y2r;
+ a[j + 3] = y0i - y2i;
+ a[j1] = x0r - x2r;
+ a[j1 + 1] = x0i + x2i;
+ a[j1 + 2] = y0r - y2r;
+ a[j1 + 3] = y0i + y2i;
+ x0r = x1r + x3i;
+ x0i = x1i + x3r;
+ a[j2] = wk1r * x0r - wk1i * x0i;
+ a[j2 + 1] = wk1r * x0i + wk1i * x0r;
+ x0r = y1r + y3i;
+ x0i = y1i + y3r;
+ a[j2 + 2] = wd1r * x0r - wd1i * x0i;
+ a[j2 + 3] = wd1r * x0i + wd1i * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i - x3r;
+ a[j3] = wk3r * x0r + wk3i * x0i;
+ a[j3 + 1] = wk3r * x0i - wk3i * x0r;
+ x0r = y1r - y3i;
+ x0i = y1i - y3r;
+ a[j3 + 2] = wd3r * x0r + wd3i * x0i;
+ a[j3 + 3] = wd3r * x0i - wd3i * x0r;
+ j0 = m - j;
+ j1 = j0 + m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[j0] + a[j2];
+ x0i = -a[j0 + 1] - a[j2 + 1];
+ x1r = a[j0] - a[j2];
+ x1i = -a[j0 + 1] + a[j2 + 1];
+ y0r = a[j0 - 2] + a[j2 - 2];
+ y0i = -a[j0 - 1] - a[j2 - 1];
+ y1r = a[j0 - 2] - a[j2 - 2];
+ y1i = -a[j0 - 1] + a[j2 - 1];
+ x2r = a[j1] + a[j3];
+ x2i = a[j1 + 1] + a[j3 + 1];
+ x3r = a[j1] - a[j3];
+ x3i = a[j1 + 1] - a[j3 + 1];
+ y2r = a[j1 - 2] + a[j3 - 2];
+ y2i = a[j1 - 1] + a[j3 - 1];
+ y3r = a[j1 - 2] - a[j3 - 2];
+ y3i = a[j1 - 1] - a[j3 - 1];
+ a[j0] = x0r + x2r;
+ a[j0 + 1] = x0i - x2i;
+ a[j0 - 2] = y0r + y2r;
+ a[j0 - 1] = y0i - y2i;
+ a[j1] = x0r - x2r;
+ a[j1 + 1] = x0i + x2i;
+ a[j1 - 2] = y0r - y2r;
+ a[j1 - 1] = y0i + y2i;
+ x0r = x1r + x3i;
+ x0i = x1i + x3r;
+ a[j2] = wk1i * x0r - wk1r * x0i;
+ a[j2 + 1] = wk1i * x0i + wk1r * x0r;
+ x0r = y1r + y3i;
+ x0i = y1i + y3r;
+ a[j2 - 2] = wd1i * x0r - wd1r * x0i;
+ a[j2 - 1] = wd1i * x0i + wd1r * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i - x3r;
+ a[j3] = wk3i * x0r + wk3r * x0i;
+ a[j3 + 1] = wk3i * x0i - wk3r * x0r;
+ x0r = y1r - y3i;
+ x0i = y1i - y3r;
+ a[j3 - 2] = wd3i * x0r + wd3r * x0i;
+ a[j3 - 1] = wd3i * x0i - wd3r * x0r;
+ }
+ wk1r = csc1 * (wd1r + wn4r);
+ wk1i = csc1 * (wd1i + wn4r);
+ wk3r = csc3 * (wd3r - wn4r);
+ wk3i = csc3 * (wd3i - wn4r);
+ j0 = mh;
+ j1 = j0 + m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[j0 - 2] + a[j2 - 2];
+ x0i = -a[j0 - 1] - a[j2 - 1];
+ x1r = a[j0 - 2] - a[j2 - 2];
+ x1i = -a[j0 - 1] + a[j2 - 1];
+ x2r = a[j1 - 2] + a[j3 - 2];
+ x2i = a[j1 - 1] + a[j3 - 1];
+ x3r = a[j1 - 2] - a[j3 - 2];
+ x3i = a[j1 - 1] - a[j3 - 1];
+ a[j0 - 2] = x0r + x2r;
+ a[j0 - 1] = x0i - x2i;
+ a[j1 - 2] = x0r - x2r;
+ a[j1 - 1] = x0i + x2i;
+ x0r = x1r + x3i;
+ x0i = x1i + x3r;
+ a[j2 - 2] = wk1r * x0r - wk1i * x0i;
+ a[j2 - 1] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i - x3r;
+ a[j3 - 2] = wk3r * x0r + wk3i * x0i;
+ a[j3 - 1] = wk3r * x0i - wk3i * x0r;
+ x0r = a[j0] + a[j2];
+ x0i = -a[j0 + 1] - a[j2 + 1];
+ x1r = a[j0] - a[j2];
+ x1i = -a[j0 + 1] + a[j2 + 1];
+ x2r = a[j1] + a[j3];
+ x2i = a[j1 + 1] + a[j3 + 1];
+ x3r = a[j1] - a[j3];
+ x3i = a[j1 + 1] - a[j3 + 1];
+ a[j0] = x0r + x2r;
+ a[j0 + 1] = x0i - x2i;
+ a[j1] = x0r - x2r;
+ a[j1 + 1] = x0i + x2i;
+ x0r = x1r + x3i;
+ x0i = x1i + x3r;
+ a[j2] = wn4r * (x0r - x0i);
+ a[j2 + 1] = wn4r * (x0i + x0r);
+ x0r = x1r - x3i;
+ x0i = x1i - x3r;
+ a[j3] = -wn4r * (x0r + x0i);
+ a[j3 + 1] = -wn4r * (x0i - x0r);
+ x0r = a[j0 + 2] + a[j2 + 2];
+ x0i = -a[j0 + 3] - a[j2 + 3];
+ x1r = a[j0 + 2] - a[j2 + 2];
+ x1i = -a[j0 + 3] + a[j2 + 3];
+ x2r = a[j1 + 2] + a[j3 + 2];
+ x2i = a[j1 + 3] + a[j3 + 3];
+ x3r = a[j1 + 2] - a[j3 + 2];
+ x3i = a[j1 + 3] - a[j3 + 3];
+ a[j0 + 2] = x0r + x2r;
+ a[j0 + 3] = x0i - x2i;
+ a[j1 + 2] = x0r - x2r;
+ a[j1 + 3] = x0i + x2i;
+ x0r = x1r + x3i;
+ x0i = x1i + x3r;
+ a[j2 + 2] = wk1i * x0r - wk1r * x0i;
+ a[j2 + 3] = wk1i * x0i + wk1r * x0r;
+ x0r = x1r - x3i;
+ x0i = x1i - x3r;
+ a[j3 + 2] = wk3i * x0r + wk3r * x0i;
+ a[j3 + 3] = wk3i * x0i - wk3r * x0r;
+}
+
+
+#ifdef USE_CDFT_THREADS
+struct cdft_arg_st {
+ int n0;
+ int n;
+ double *a;
+ int nw;
+ double *w;
+};
+typedef struct cdft_arg_st cdft_arg_t;
+
+
+void cftrec4_th(int n, double *a, int nw, double *w)
+{
+ void *cftrec1_th(void *p);
+ void *cftrec2_th(void *p);
+ int i, idiv4, m, nthread;
+ cdft_thread_t th[4];
+ cdft_arg_t ag[4];
+
+ nthread = 2;
+ idiv4 = 0;
+ m = n >> 1;
+ if (n > CDFT_4THREADS_BEGIN_N) {
+ nthread = 4;
+ idiv4 = 1;
+ m >>= 1;
+ }
+ for (i = 0; i < nthread; i++) {
+ ag[i].n0 = n;
+ ag[i].n = m;
+ ag[i].a = &a[i * m];
+ ag[i].nw = nw;
+ ag[i].w = w;
+ if (i != idiv4) {
+ cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
+ } else {
+ cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
+ }
+ }
+ for (i = 0; i < nthread; i++) {
+ cdft_thread_wait(th[i]);
+ }
+}
+
+
+void *cftrec1_th(void *p)
+{
+ int cfttree(int n, int j, int k, double *a, int nw, double *w);
+ void cftleaf(int n, int isplt, double *a, int nw, double *w);
+ void cftmdl1(int n, double *a, double *w);
+ int isplt, j, k, m, n, n0, nw;
+ double *a, *w;
+
+ n0 = ((cdft_arg_t *) p)->n0;
+ n = ((cdft_arg_t *) p)->n;
+ a = ((cdft_arg_t *) p)->a;
+ nw = ((cdft_arg_t *) p)->nw;
+ w = ((cdft_arg_t *) p)->w;
+ m = n0;
+ while (m > 512) {
+ m >>= 2;
+ cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
+ }
+ cftleaf(m, 1, &a[n - m], nw, w);
+ k = 0;
+ for (j = n - m; j > 0; j -= m) {
+ k++;
+ isplt = cfttree(m, j, k, a, nw, w);
+ cftleaf(m, isplt, &a[j - m], nw, w);
+ }
+ return (void *) 0;
+}
+
+
+void *cftrec2_th(void *p)
+{
+ int cfttree(int n, int j, int k, double *a, int nw, double *w);
+ void cftleaf(int n, int isplt, double *a, int nw, double *w);
+ void cftmdl2(int n, double *a, double *w);
+ int isplt, j, k, m, n, n0, nw;
+ double *a, *w;
+
+ n0 = ((cdft_arg_t *) p)->n0;
+ n = ((cdft_arg_t *) p)->n;
+ a = ((cdft_arg_t *) p)->a;
+ nw = ((cdft_arg_t *) p)->nw;
+ w = ((cdft_arg_t *) p)->w;
+ k = 1;
+ m = n0;
+ while (m > 512) {
+ m >>= 2;
+ k <<= 2;
+ cftmdl2(m, &a[n - m], &w[nw - m]);
+ }
+ cftleaf(m, 0, &a[n - m], nw, w);
+ k >>= 1;
+ for (j = n - m; j > 0; j -= m) {
+ k++;
+ isplt = cfttree(m, j, k, a, nw, w);
+ cftleaf(m, isplt, &a[j - m], nw, w);
+ }
+ return (void *) 0;
+}
+#endif /* USE_CDFT_THREADS */
+
+
+void cftrec4(int n, double *a, int nw, double *w)
+{
+ int cfttree(int n, int j, int k, double *a, int nw, double *w);
+ void cftleaf(int n, int isplt, double *a, int nw, double *w);
+ void cftmdl1(int n, double *a, double *w);
+ int isplt, j, k, m;
+
+ m = n;
+ while (m > 512) {
+ m >>= 2;
+ cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
+ }
+ cftleaf(m, 1, &a[n - m], nw, w);
+ k = 0;
+ for (j = n - m; j > 0; j -= m) {
+ k++;
+ isplt = cfttree(m, j, k, a, nw, w);
+ cftleaf(m, isplt, &a[j - m], nw, w);
+ }
+}
+
+
+int cfttree(int n, int j, int k, double *a, int nw, double *w)
+{
+ void cftmdl1(int n, double *a, double *w);
+ void cftmdl2(int n, double *a, double *w);
+ int i, isplt, m;
+
+ if ((k & 3) != 0) {
+ isplt = k & 1;
+ if (isplt != 0) {
+ cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]);
+ } else {
+ cftmdl2(n, &a[j - n], &w[nw - n]);
+ }
+ } else {
+ m = n;
+ for (i = k; (i & 3) == 0; i >>= 2) {
+ m <<= 2;
+ }
+ isplt = i & 1;
+ if (isplt != 0) {
+ while (m > 128) {
+ cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]);
+ m >>= 2;
+ }
+ } else {
+ while (m > 128) {
+ cftmdl2(m, &a[j - m], &w[nw - m]);
+ m >>= 2;
+ }
+ }
+ }
+ return isplt;
+}
+
+
+void cftleaf(int n, int isplt, double *a, int nw, double *w)
+{
+ void cftmdl1(int n, double *a, double *w);
+ void cftmdl2(int n, double *a, double *w);
+ void cftf161(double *a, double *w);
+ void cftf162(double *a, double *w);
+ void cftf081(double *a, double *w);
+ void cftf082(double *a, double *w);
+
+ if (n == 512) {
+ cftmdl1(128, a, &w[nw - 64]);
+ cftf161(a, &w[nw - 8]);
+ cftf162(&a[32], &w[nw - 32]);
+ cftf161(&a[64], &w[nw - 8]);
+ cftf161(&a[96], &w[nw - 8]);
+ cftmdl2(128, &a[128], &w[nw - 128]);
+ cftf161(&a[128], &w[nw - 8]);
+ cftf162(&a[160], &w[nw - 32]);
+ cftf161(&a[192], &w[nw - 8]);
+ cftf162(&a[224], &w[nw - 32]);
+ cftmdl1(128, &a[256], &w[nw - 64]);
+ cftf161(&a[256], &w[nw - 8]);
+ cftf162(&a[288], &w[nw - 32]);
+ cftf161(&a[320], &w[nw - 8]);
+ cftf161(&a[352], &w[nw - 8]);
+ if (isplt != 0) {
+ cftmdl1(128, &a[384], &w[nw - 64]);
+ cftf161(&a[480], &w[nw - 8]);
+ } else {
+ cftmdl2(128, &a[384], &w[nw - 128]);
+ cftf162(&a[480], &w[nw - 32]);
+ }
+ cftf161(&a[384], &w[nw - 8]);
+ cftf162(&a[416], &w[nw - 32]);
+ cftf161(&a[448], &w[nw - 8]);
+ } else {
+ cftmdl1(64, a, &w[nw - 32]);
+ cftf081(a, &w[nw - 8]);
+ cftf082(&a[16], &w[nw - 8]);
+ cftf081(&a[32], &w[nw - 8]);
+ cftf081(&a[48], &w[nw - 8]);
+ cftmdl2(64, &a[64], &w[nw - 64]);
+ cftf081(&a[64], &w[nw - 8]);
+ cftf082(&a[80], &w[nw - 8]);
+ cftf081(&a[96], &w[nw - 8]);
+ cftf082(&a[112], &w[nw - 8]);
+ cftmdl1(64, &a[128], &w[nw - 32]);
+ cftf081(&a[128], &w[nw - 8]);
+ cftf082(&a[144], &w[nw - 8]);
+ cftf081(&a[160], &w[nw - 8]);
+ cftf081(&a[176], &w[nw - 8]);
+ if (isplt != 0) {
+ cftmdl1(64, &a[192], &w[nw - 32]);
+ cftf081(&a[240], &w[nw - 8]);
+ } else {
+ cftmdl2(64, &a[192], &w[nw - 64]);
+ cftf082(&a[240], &w[nw - 8]);
+ }
+ cftf081(&a[192], &w[nw - 8]);
+ cftf082(&a[208], &w[nw - 8]);
+ cftf081(&a[224], &w[nw - 8]);
+ }
+}
+
+
+void cftmdl1(int n, double *a, double *w)
+{
+ int j, j0, j1, j2, j3, k, m, mh;
+ double wn4r, wk1r, wk1i, wk3r, wk3i;
+ double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ mh = n >> 3;
+ m = 2 * mh;
+ j1 = m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[0] + a[j2];
+ x0i = a[1] + a[j2 + 1];
+ x1r = a[0] - a[j2];
+ x1i = a[1] - a[j2 + 1];
+ x2r = a[j1] + a[j3];
+ x2i = a[j1 + 1] + a[j3 + 1];
+ x3r = a[j1] - a[j3];
+ x3i = a[j1 + 1] - a[j3 + 1];
+ a[0] = x0r + x2r;
+ a[1] = x0i + x2i;
+ a[j1] = x0r - x2r;
+ a[j1 + 1] = x0i - x2i;
+ a[j2] = x1r - x3i;
+ a[j2 + 1] = x1i + x3r;
+ a[j3] = x1r + x3i;
+ a[j3 + 1] = x1i - x3r;
+ wn4r = w[1];
+ k = 0;
+ for (j = 2; j < mh; j += 2) {
+ k += 4;
+ wk1r = w[k];
+ wk1i = w[k + 1];
+ wk3r = w[k + 2];
+ wk3i = w[k + 3];
+ j1 = j + m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[j] + a[j2];
+ x0i = a[j + 1] + a[j2 + 1];
+ x1r = a[j] - a[j2];
+ x1i = a[j + 1] - a[j2 + 1];
+ x2r = a[j1] + a[j3];
+ x2i = a[j1 + 1] + a[j3 + 1];
+ x3r = a[j1] - a[j3];
+ x3i = a[j1 + 1] - a[j3 + 1];
+ a[j] = x0r + x2r;
+ a[j + 1] = x0i + x2i;
+ a[j1] = x0r - x2r;
+ a[j1 + 1] = x0i - x2i;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j2] = wk1r * x0r - wk1i * x0i;
+ a[j2 + 1] = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3] = wk3r * x0r + wk3i * x0i;
+ a[j3 + 1] = wk3r * x0i - wk3i * x0r;
+ j0 = m - j;
+ j1 = j0 + m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[j0] + a[j2];
+ x0i = a[j0 + 1] + a[j2 + 1];
+ x1r = a[j0] - a[j2];
+ x1i = a[j0 + 1] - a[j2 + 1];
+ x2r = a[j1] + a[j3];
+ x2i = a[j1 + 1] + a[j3 + 1];
+ x3r = a[j1] - a[j3];
+ x3i = a[j1 + 1] - a[j3 + 1];
+ a[j0] = x0r + x2r;
+ a[j0 + 1] = x0i + x2i;
+ a[j1] = x0r - x2r;
+ a[j1 + 1] = x0i - x2i;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j2] = wk1i * x0r - wk1r * x0i;
+ a[j2 + 1] = wk1i * x0i + wk1r * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3] = wk3i * x0r + wk3r * x0i;
+ a[j3 + 1] = wk3i * x0i - wk3r * x0r;
+ }
+ j0 = mh;
+ j1 = j0 + m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[j0] + a[j2];
+ x0i = a[j0 + 1] + a[j2 + 1];
+ x1r = a[j0] - a[j2];
+ x1i = a[j0 + 1] - a[j2 + 1];
+ x2r = a[j1] + a[j3];
+ x2i = a[j1 + 1] + a[j3 + 1];
+ x3r = a[j1] - a[j3];
+ x3i = a[j1 + 1] - a[j3 + 1];
+ a[j0] = x0r + x2r;
+ a[j0 + 1] = x0i + x2i;
+ a[j1] = x0r - x2r;
+ a[j1 + 1] = x0i - x2i;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ a[j2] = wn4r * (x0r - x0i);
+ a[j2 + 1] = wn4r * (x0i + x0r);
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ a[j3] = -wn4r * (x0r + x0i);
+ a[j3 + 1] = -wn4r * (x0i - x0r);
+}
+
+
+void cftmdl2(int n, double *a, double *w)
+{
+ int j, j0, j1, j2, j3, k, kr, m, mh;
+ double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
+ double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
+
+ mh = n >> 3;
+ m = 2 * mh;
+ wn4r = w[1];
+ j1 = m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[0] - a[j2 + 1];
+ x0i = a[1] + a[j2];
+ x1r = a[0] + a[j2 + 1];
+ x1i = a[1] - a[j2];
+ x2r = a[j1] - a[j3 + 1];
+ x2i = a[j1 + 1] + a[j3];
+ x3r = a[j1] + a[j3 + 1];
+ x3i = a[j1 + 1] - a[j3];
+ y0r = wn4r * (x2r - x2i);
+ y0i = wn4r * (x2i + x2r);
+ a[0] = x0r + y0r;
+ a[1] = x0i + y0i;
+ a[j1] = x0r - y0r;
+ a[j1 + 1] = x0i - y0i;
+ y0r = wn4r * (x3r - x3i);
+ y0i = wn4r * (x3i + x3r);
+ a[j2] = x1r - y0i;
+ a[j2 + 1] = x1i + y0r;
+ a[j3] = x1r + y0i;
+ a[j3 + 1] = x1i - y0r;
+ k = 0;
+ kr = 2 * m;
+ for (j = 2; j < mh; j += 2) {
+ k += 4;
+ wk1r = w[k];
+ wk1i = w[k + 1];
+ wk3r = w[k + 2];
+ wk3i = w[k + 3];
+ kr -= 4;
+ wd1i = w[kr];
+ wd1r = w[kr + 1];
+ wd3i = w[kr + 2];
+ wd3r = w[kr + 3];
+ j1 = j + m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[j] - a[j2 + 1];
+ x0i = a[j + 1] + a[j2];
+ x1r = a[j] + a[j2 + 1];
+ x1i = a[j + 1] - a[j2];
+ x2r = a[j1] - a[j3 + 1];
+ x2i = a[j1 + 1] + a[j3];
+ x3r = a[j1] + a[j3 + 1];
+ x3i = a[j1 + 1] - a[j3];
+ y0r = wk1r * x0r - wk1i * x0i;
+ y0i = wk1r * x0i + wk1i * x0r;
+ y2r = wd1r * x2r - wd1i * x2i;
+ y2i = wd1r * x2i + wd1i * x2r;
+ a[j] = y0r + y2r;
+ a[j + 1] = y0i + y2i;
+ a[j1] = y0r - y2r;
+ a[j1 + 1] = y0i - y2i;
+ y0r = wk3r * x1r + wk3i * x1i;
+ y0i = wk3r * x1i - wk3i * x1r;
+ y2r = wd3r * x3r + wd3i * x3i;
+ y2i = wd3r * x3i - wd3i * x3r;
+ a[j2] = y0r + y2r;
+ a[j2 + 1] = y0i + y2i;
+ a[j3] = y0r - y2r;
+ a[j3 + 1] = y0i - y2i;
+ j0 = m - j;
+ j1 = j0 + m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[j0] - a[j2 + 1];
+ x0i = a[j0 + 1] + a[j2];
+ x1r = a[j0] + a[j2 + 1];
+ x1i = a[j0 + 1] - a[j2];
+ x2r = a[j1] - a[j3 + 1];
+ x2i = a[j1 + 1] + a[j3];
+ x3r = a[j1] + a[j3 + 1];
+ x3i = a[j1 + 1] - a[j3];
+ y0r = wd1i * x0r - wd1r * x0i;
+ y0i = wd1i * x0i + wd1r * x0r;
+ y2r = wk1i * x2r - wk1r * x2i;
+ y2i = wk1i * x2i + wk1r * x2r;
+ a[j0] = y0r + y2r;
+ a[j0 + 1] = y0i + y2i;
+ a[j1] = y0r - y2r;
+ a[j1 + 1] = y0i - y2i;
+ y0r = wd3i * x1r + wd3r * x1i;
+ y0i = wd3i * x1i - wd3r * x1r;
+ y2r = wk3i * x3r + wk3r * x3i;
+ y2i = wk3i * x3i - wk3r * x3r;
+ a[j2] = y0r + y2r;
+ a[j2 + 1] = y0i + y2i;
+ a[j3] = y0r - y2r;
+ a[j3 + 1] = y0i - y2i;
+ }
+ wk1r = w[m];
+ wk1i = w[m + 1];
+ j0 = mh;
+ j1 = j0 + m;
+ j2 = j1 + m;
+ j3 = j2 + m;
+ x0r = a[j0] - a[j2 + 1];
+ x0i = a[j0 + 1] + a[j2];
+ x1r = a[j0] + a[j2 + 1];
+ x1i = a[j0 + 1] - a[j2];
+ x2r = a[j1] - a[j3 + 1];
+ x2i = a[j1 + 1] + a[j3];
+ x3r = a[j1] + a[j3 + 1];
+ x3i = a[j1 + 1] - a[j3];
+ y0r = wk1r * x0r - wk1i * x0i;
+ y0i = wk1r * x0i + wk1i * x0r;
+ y2r = wk1i * x2r - wk1r * x2i;
+ y2i = wk1i * x2i + wk1r * x2r;
+ a[j0] = y0r + y2r;
+ a[j0 + 1] = y0i + y2i;
+ a[j1] = y0r - y2r;
+ a[j1 + 1] = y0i - y2i;
+ y0r = wk1i * x1r - wk1r * x1i;
+ y0i = wk1i * x1i + wk1r * x1r;
+ y2r = wk1r * x3r - wk1i * x3i;
+ y2i = wk1r * x3i + wk1i * x3r;
+ a[j2] = y0r - y2r;
+ a[j2 + 1] = y0i - y2i;
+ a[j3] = y0r + y2r;
+ a[j3 + 1] = y0i + y2i;
+}
+
+
+void cftfx41(int n, double *a, int nw, double *w)
+{
+ void cftf161(double *a, double *w);
+ void cftf162(double *a, double *w);
+ void cftf081(double *a, double *w);
+ void cftf082(double *a, double *w);
+
+ if (n == 128) {
+ cftf161(a, &w[nw - 8]);
+ cftf162(&a[32], &w[nw - 32]);
+ cftf161(&a[64], &w[nw - 8]);
+ cftf161(&a[96], &w[nw - 8]);
+ } else {
+ cftf081(a, &w[nw - 8]);
+ cftf082(&a[16], &w[nw - 8]);
+ cftf081(&a[32], &w[nw - 8]);
+ cftf081(&a[48], &w[nw - 8]);
+ }
+}
+
+
+void cftf161(double *a, double *w)
+{
+ double wn4r, wk1r, wk1i,
+ x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
+ y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
+ y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
+ y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
+ y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
+
+ wn4r = w[1];
+ wk1r = w[2];
+ wk1i = w[3];
+ x0r = a[0] + a[16];
+ x0i = a[1] + a[17];
+ x1r = a[0] - a[16];
+ x1i = a[1] - a[17];
+ x2r = a[8] + a[24];
+ x2i = a[9] + a[25];
+ x3r = a[8] - a[24];
+ x3i = a[9] - a[25];
+ y0r = x0r + x2r;
+ y0i = x0i + x2i;
+ y4r = x0r - x2r;
+ y4i = x0i - x2i;
+ y8r = x1r - x3i;
+ y8i = x1i + x3r;
+ y12r = x1r + x3i;
+ y12i = x1i - x3r;
+ x0r = a[2] + a[18];
+ x0i = a[3] + a[19];
+ x1r = a[2] - a[18];
+ x1i = a[3] - a[19];
+ x2r = a[10] + a[26];
+ x2i = a[11] + a[27];
+ x3r = a[10] - a[26];
+ x3i = a[11] - a[27];
+ y1r = x0r + x2r;
+ y1i = x0i + x2i;
+ y5r = x0r - x2r;
+ y5i = x0i - x2i;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ y9r = wk1r * x0r - wk1i * x0i;
+ y9i = wk1r * x0i + wk1i * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ y13r = wk1i * x0r - wk1r * x0i;
+ y13i = wk1i * x0i + wk1r * x0r;
+ x0r = a[4] + a[20];
+ x0i = a[5] + a[21];
+ x1r = a[4] - a[20];
+ x1i = a[5] - a[21];
+ x2r = a[12] + a[28];
+ x2i = a[13] + a[29];
+ x3r = a[12] - a[28];
+ x3i = a[13] - a[29];
+ y2r = x0r + x2r;
+ y2i = x0i + x2i;
+ y6r = x0r - x2r;
+ y6i = x0i - x2i;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ y10r = wn4r * (x0r - x0i);
+ y10i = wn4r * (x0i + x0r);
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ y14r = wn4r * (x0r + x0i);
+ y14i = wn4r * (x0i - x0r);
+ x0r = a[6] + a[22];
+ x0i = a[7] + a[23];
+ x1r = a[6] - a[22];
+ x1i = a[7] - a[23];
+ x2r = a[14] + a[30];
+ x2i = a[15] + a[31];
+ x3r = a[14] - a[30];
+ x3i = a[15] - a[31];
+ y3r = x0r + x2r;
+ y3i = x0i + x2i;
+ y7r = x0r - x2r;
+ y7i = x0i - x2i;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ y11r = wk1i * x0r - wk1r * x0i;
+ y11i = wk1i * x0i + wk1r * x0r;
+ x0r = x1r + x3i;
+ x0i = x1i - x3r;
+ y15r = wk1r * x0r - wk1i * x0i;
+ y15i = wk1r * x0i + wk1i * x0r;
+ x0r = y12r - y14r;
+ x0i = y12i - y14i;
+ x1r = y12r + y14r;
+ x1i = y12i + y14i;
+ x2r = y13r - y15r;
+ x2i = y13i - y15i;
+ x3r = y13r + y15r;
+ x3i = y13i + y15i;
+ a[24] = x0r + x2r;
+ a[25] = x0i + x2i;
+ a[26] = x0r - x2r;
+ a[27] = x0i - x2i;
+ a[28] = x1r - x3i;
+ a[29] = x1i + x3r;
+ a[30] = x1r + x3i;
+ a[31] = x1i - x3r;
+ x0r = y8r + y10r;
+ x0i = y8i + y10i;
+ x1r = y8r - y10r;
+ x1i = y8i - y10i;
+ x2r = y9r + y11r;
+ x2i = y9i + y11i;
+ x3r = y9r - y11r;
+ x3i = y9i - y11i;
+ a[16] = x0r + x2r;
+ a[17] = x0i + x2i;
+ a[18] = x0r - x2r;
+ a[19] = x0i - x2i;
+ a[20] = x1r - x3i;
+ a[21] = x1i + x3r;
+ a[22] = x1r + x3i;
+ a[23] = x1i - x3r;
+ x0r = y5r - y7i;
+ x0i = y5i + y7r;
+ x2r = wn4r * (x0r - x0i);
+ x2i = wn4r * (x0i + x0r);
+ x0r = y5r + y7i;
+ x0i = y5i - y7r;
+ x3r = wn4r * (x0r - x0i);
+ x3i = wn4r * (x0i + x0r);
+ x0r = y4r - y6i;
+ x0i = y4i + y6r;
+ x1r = y4r + y6i;
+ x1i = y4i - y6r;
+ a[8] = x0r + x2r;
+ a[9] = x0i + x2i;
+ a[10] = x0r - x2r;
+ a[11] = x0i - x2i;
+ a[12] = x1r - x3i;
+ a[13] = x1i + x3r;
+ a[14] = x1r + x3i;
+ a[15] = x1i - x3r;
+ x0r = y0r + y2r;
+ x0i = y0i + y2i;
+ x1r = y0r - y2r;
+ x1i = y0i - y2i;
+ x2r = y1r + y3r;
+ x2i = y1i + y3i;
+ x3r = y1r - y3r;
+ x3i = y1i - y3i;
+ a[0] = x0r + x2r;
+ a[1] = x0i + x2i;
+ a[2] = x0r - x2r;
+ a[3] = x0i - x2i;
+ a[4] = x1r - x3i;
+ a[5] = x1i + x3r;
+ a[6] = x1r + x3i;
+ a[7] = x1i - x3r;
+}
+
+
+void cftf162(double *a, double *w)
+{
+ double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i,
+ x0r, x0i, x1r, x1i, x2r, x2i,
+ y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
+ y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
+ y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
+ y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
+
+ wn4r = w[1];
+ wk1r = w[4];
+ wk1i = w[5];
+ wk3r = w[6];
+ wk3i = -w[7];
+ wk2r = w[8];
+ wk2i = w[9];
+ x1r = a[0] - a[17];
+ x1i = a[1] + a[16];
+ x0r = a[8] - a[25];
+ x0i = a[9] + a[24];
+ x2r = wn4r * (x0r - x0i);
+ x2i = wn4r * (x0i + x0r);
+ y0r = x1r + x2r;
+ y0i = x1i + x2i;
+ y4r = x1r - x2r;
+ y4i = x1i - x2i;
+ x1r = a[0] + a[17];
+ x1i = a[1] - a[16];
+ x0r = a[8] + a[25];
+ x0i = a[9] - a[24];
+ x2r = wn4r * (x0r - x0i);
+ x2i = wn4r * (x0i + x0r);
+ y8r = x1r - x2i;
+ y8i = x1i + x2r;
+ y12r = x1r + x2i;
+ y12i = x1i - x2r;
+ x0r = a[2] - a[19];
+ x0i = a[3] + a[18];
+ x1r = wk1r * x0r - wk1i * x0i;
+ x1i = wk1r * x0i + wk1i * x0r;
+ x0r = a[10] - a[27];
+ x0i = a[11] + a[26];
+ x2r = wk3i * x0r - wk3r * x0i;
+ x2i = wk3i * x0i + wk3r * x0r;
+ y1r = x1r + x2r;
+ y1i = x1i + x2i;
+ y5r = x1r - x2r;
+ y5i = x1i - x2i;
+ x0r = a[2] + a[19];
+ x0i = a[3] - a[18];
+ x1r = wk3r * x0r - wk3i * x0i;
+ x1i = wk3r * x0i + wk3i * x0r;
+ x0r = a[10] + a[27];
+ x0i = a[11] - a[26];
+ x2r = wk1r * x0r + wk1i * x0i;
+ x2i = wk1r * x0i - wk1i * x0r;
+ y9r = x1r - x2r;
+ y9i = x1i - x2i;
+ y13r = x1r + x2r;
+ y13i = x1i + x2i;
+ x0r = a[4] - a[21];
+ x0i = a[5] + a[20];
+ x1r = wk2r * x0r - wk2i * x0i;
+ x1i = wk2r * x0i + wk2i * x0r;
+ x0r = a[12] - a[29];
+ x0i = a[13] + a[28];
+ x2r = wk2i * x0r - wk2r * x0i;
+ x2i = wk2i * x0i + wk2r * x0r;
+ y2r = x1r + x2r;
+ y2i = x1i + x2i;
+ y6r = x1r - x2r;
+ y6i = x1i - x2i;
+ x0r = a[4] + a[21];
+ x0i = a[5] - a[20];
+ x1r = wk2i * x0r - wk2r * x0i;
+ x1i = wk2i * x0i + wk2r * x0r;
+ x0r = a[12] + a[29];
+ x0i = a[13] - a[28];
+ x2r = wk2r * x0r - wk2i * x0i;
+ x2i = wk2r * x0i + wk2i * x0r;
+ y10r = x1r - x2r;
+ y10i = x1i - x2i;
+ y14r = x1r + x2r;
+ y14i = x1i + x2i;
+ x0r = a[6] - a[23];
+ x0i = a[7] + a[22];
+ x1r = wk3r * x0r - wk3i * x0i;
+ x1i = wk3r * x0i + wk3i * x0r;
+ x0r = a[14] - a[31];
+ x0i = a[15] + a[30];
+ x2r = wk1i * x0r - wk1r * x0i;
+ x2i = wk1i * x0i + wk1r * x0r;
+ y3r = x1r + x2r;
+ y3i = x1i + x2i;
+ y7r = x1r - x2r;
+ y7i = x1i - x2i;
+ x0r = a[6] + a[23];
+ x0i = a[7] - a[22];
+ x1r = wk1i * x0r + wk1r * x0i;
+ x1i = wk1i * x0i - wk1r * x0r;
+ x0r = a[14] + a[31];
+ x0i = a[15] - a[30];
+ x2r = wk3i * x0r - wk3r * x0i;
+ x2i = wk3i * x0i + wk3r * x0r;
+ y11r = x1r + x2r;
+ y11i = x1i + x2i;
+ y15r = x1r - x2r;
+ y15i = x1i - x2i;
+ x1r = y0r + y2r;
+ x1i = y0i + y2i;
+ x2r = y1r + y3r;
+ x2i = y1i + y3i;
+ a[0] = x1r + x2r;
+ a[1] = x1i + x2i;
+ a[2] = x1r - x2r;
+ a[3] = x1i - x2i;
+ x1r = y0r - y2r;
+ x1i = y0i - y2i;
+ x2r = y1r - y3r;
+ x2i = y1i - y3i;
+ a[4] = x1r - x2i;
+ a[5] = x1i + x2r;
+ a[6] = x1r + x2i;
+ a[7] = x1i - x2r;
+ x1r = y4r - y6i;
+ x1i = y4i + y6r;
+ x0r = y5r - y7i;
+ x0i = y5i + y7r;
+ x2r = wn4r * (x0r - x0i);
+ x2i = wn4r * (x0i + x0r);
+ a[8] = x1r + x2r;
+ a[9] = x1i + x2i;
+ a[10] = x1r - x2r;
+ a[11] = x1i - x2i;
+ x1r = y4r + y6i;
+ x1i = y4i - y6r;
+ x0r = y5r + y7i;
+ x0i = y5i - y7r;
+ x2r = wn4r * (x0r - x0i);
+ x2i = wn4r * (x0i + x0r);
+ a[12] = x1r - x2i;
+ a[13] = x1i + x2r;
+ a[14] = x1r + x2i;
+ a[15] = x1i - x2r;
+ x1r = y8r + y10r;
+ x1i = y8i + y10i;
+ x2r = y9r - y11r;
+ x2i = y9i - y11i;
+ a[16] = x1r + x2r;
+ a[17] = x1i + x2i;
+ a[18] = x1r - x2r;
+ a[19] = x1i - x2i;
+ x1r = y8r - y10r;
+ x1i = y8i - y10i;
+ x2r = y9r + y11r;
+ x2i = y9i + y11i;
+ a[20] = x1r - x2i;
+ a[21] = x1i + x2r;
+ a[22] = x1r + x2i;
+ a[23] = x1i - x2r;
+ x1r = y12r - y14i;
+ x1i = y12i + y14r;
+ x0r = y13r + y15i;
+ x0i = y13i - y15r;
+ x2r = wn4r * (x0r - x0i);
+ x2i = wn4r * (x0i + x0r);
+ a[24] = x1r + x2r;
+ a[25] = x1i + x2i;
+ a[26] = x1r - x2r;
+ a[27] = x1i - x2i;
+ x1r = y12r + y14i;
+ x1i = y12i - y14r;
+ x0r = y13r - y15i;
+ x0i = y13i + y15r;
+ x2r = wn4r * (x0r - x0i);
+ x2i = wn4r * (x0i + x0r);
+ a[28] = x1r - x2i;
+ a[29] = x1i + x2r;
+ a[30] = x1r + x2i;
+ a[31] = x1i - x2r;
+}
+
+
+void cftf081(double *a, double *w)
+{
+ double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
+ y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
+ y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
+
+ wn4r = w[1];
+ x0r = a[0] + a[8];
+ x0i = a[1] + a[9];
+ x1r = a[0] - a[8];
+ x1i = a[1] - a[9];
+ x2r = a[4] + a[12];
+ x2i = a[5] + a[13];
+ x3r = a[4] - a[12];
+ x3i = a[5] - a[13];
+ y0r = x0r + x2r;
+ y0i = x0i + x2i;
+ y2r = x0r - x2r;
+ y2i = x0i - x2i;
+ y1r = x1r - x3i;
+ y1i = x1i + x3r;
+ y3r = x1r + x3i;
+ y3i = x1i - x3r;
+ x0r = a[2] + a[10];
+ x0i = a[3] + a[11];
+ x1r = a[2] - a[10];
+ x1i = a[3] - a[11];
+ x2r = a[6] + a[14];
+ x2i = a[7] + a[15];
+ x3r = a[6] - a[14];
+ x3i = a[7] - a[15];
+ y4r = x0r + x2r;
+ y4i = x0i + x2i;
+ y6r = x0r - x2r;
+ y6i = x0i - x2i;
+ x0r = x1r - x3i;
+ x0i = x1i + x3r;
+ x2r = x1r + x3i;
+ x2i = x1i - x3r;
+ y5r = wn4r * (x0r - x0i);
+ y5i = wn4r * (x0r + x0i);
+ y7r = wn4r * (x2r - x2i);
+ y7i = wn4r * (x2r + x2i);
+ a[8] = y1r + y5r;
+ a[9] = y1i + y5i;
+ a[10] = y1r - y5r;
+ a[11] = y1i - y5i;
+ a[12] = y3r - y7i;
+ a[13] = y3i + y7r;
+ a[14] = y3r + y7i;
+ a[15] = y3i - y7r;
+ a[0] = y0r + y4r;
+ a[1] = y0i + y4i;
+ a[2] = y0r - y4r;
+ a[3] = y0i - y4i;
+ a[4] = y2r - y6i;
+ a[5] = y2i + y6r;
+ a[6] = y2r + y6i;
+ a[7] = y2i - y6r;
+}
+
+
+void cftf082(double *a, double *w)
+{
+ double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i,
+ y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
+ y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
+
+ wn4r = w[1];
+ wk1r = w[2];
+ wk1i = w[3];
+ y0r = a[0] - a[9];
+ y0i = a[1] + a[8];
+ y1r = a[0] + a[9];
+ y1i = a[1] - a[8];
+ x0r = a[4] - a[13];
+ x0i = a[5] + a[12];
+ y2r = wn4r * (x0r - x0i);
+ y2i = wn4r * (x0i + x0r);
+ x0r = a[4] + a[13];
+ x0i = a[5] - a[12];
+ y3r = wn4r * (x0r - x0i);
+ y3i = wn4r * (x0i + x0r);
+ x0r = a[2] - a[11];
+ x0i = a[3] + a[10];
+ y4r = wk1r * x0r - wk1i * x0i;
+ y4i = wk1r * x0i + wk1i * x0r;
+ x0r = a[2] + a[11];
+ x0i = a[3] - a[10];
+ y5r = wk1i * x0r - wk1r * x0i;
+ y5i = wk1i * x0i + wk1r * x0r;
+ x0r = a[6] - a[15];
+ x0i = a[7] + a[14];
+ y6r = wk1i * x0r - wk1r * x0i;
+ y6i = wk1i * x0i + wk1r * x0r;
+ x0r = a[6] + a[15];
+ x0i = a[7] - a[14];
+ y7r = wk1r * x0r - wk1i * x0i;
+ y7i = wk1r * x0i + wk1i * x0r;
+ x0r = y0r + y2r;
+ x0i = y0i + y2i;
+ x1r = y4r + y6r;
+ x1i = y4i + y6i;
+ a[0] = x0r + x1r;
+ a[1] = x0i + x1i;
+ a[2] = x0r - x1r;
+ a[3] = x0i - x1i;
+ x0r = y0r - y2r;
+ x0i = y0i - y2i;
+ x1r = y4r - y6r;
+ x1i = y4i - y6i;
+ a[4] = x0r - x1i;
+ a[5] = x0i + x1r;
+ a[6] = x0r + x1i;
+ a[7] = x0i - x1r;
+ x0r = y1r - y3i;
+ x0i = y1i + y3r;
+ x1r = y5r - y7r;
+ x1i = y5i - y7i;
+ a[8] = x0r + x1r;
+ a[9] = x0i + x1i;
+ a[10] = x0r - x1r;
+ a[11] = x0i - x1i;
+ x0r = y1r + y3i;
+ x0i = y1i - y3r;
+ x1r = y5r + y7r;
+ x1i = y5i + y7i;
+ a[12] = x0r - x1i;
+ a[13] = x0i + x1r;
+ a[14] = x0r + x1i;
+ a[15] = x0i - x1r;
+}
+
+
+void cftf040(double *a)
+{
+ double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ x0r = a[0] + a[4];
+ x0i = a[1] + a[5];
+ x1r = a[0] - a[4];
+ x1i = a[1] - a[5];
+ x2r = a[2] + a[6];
+ x2i = a[3] + a[7];
+ x3r = a[2] - a[6];
+ x3i = a[3] - a[7];
+ a[0] = x0r + x2r;
+ a[1] = x0i + x2i;
+ a[2] = x1r - x3i;
+ a[3] = x1i + x3r;
+ a[4] = x0r - x2r;
+ a[5] = x0i - x2i;
+ a[6] = x1r + x3i;
+ a[7] = x1i - x3r;
+}
+
+
+void cftb040(double *a)
+{
+ double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+ x0r = a[0] + a[4];
+ x0i = a[1] + a[5];
+ x1r = a[0] - a[4];
+ x1i = a[1] - a[5];
+ x2r = a[2] + a[6];
+ x2i = a[3] + a[7];
+ x3r = a[2] - a[6];
+ x3i = a[3] - a[7];
+ a[0] = x0r + x2r;
+ a[1] = x0i + x2i;
+ a[2] = x1r + x3i;
+ a[3] = x1i - x3r;
+ a[4] = x0r - x2r;
+ a[5] = x0i - x2i;
+ a[6] = x1r - x3i;
+ a[7] = x1i + x3r;
+}
+
+
+void cftx020(double *a)
+{
+ double x0r, x0i;
+
+ x0r = a[0] - a[2];
+ x0i = a[1] - a[3];
+ a[0] += a[2];
+ a[1] += a[3];
+ a[2] = x0r;
+ a[3] = x0i;
+}
+
+
+void rftfsub(int n, double *a, int nc, double *c)
+{
+ int j, k, kk, ks, m;
+ double wkr, wki, xr, xi, yr, yi;
+
+ m = n >> 1;
+ ks = 2 * nc / m;
+ kk = 0;
+ for (j = 2; j < m; j += 2) {
+ k = n - j;
+ kk += ks;
+ wkr = 0.5 - c[nc - kk];
+ wki = c[kk];
+ xr = a[j] - a[k];
+ xi = a[j + 1] + a[k + 1];
+ yr = wkr * xr - wki * xi;
+ yi = wkr * xi + wki * xr;
+ a[j] -= yr;
+ a[j + 1] -= yi;
+ a[k] += yr;
+ a[k + 1] -= yi;
+ }
+}
+
+
+void rftbsub(int n, double *a, int nc, double *c)
+{
+ int j, k, kk, ks, m;
+ double wkr, wki, xr, xi, yr, yi;
+
+ m = n >> 1;
+ ks = 2 * nc / m;
+ kk = 0;
+ for (j = 2; j < m; j += 2) {
+ k = n - j;
+ kk += ks;
+ wkr = 0.5 - c[nc - kk];
+ wki = c[kk];
+ xr = a[j] - a[k];
+ xi = a[j + 1] + a[k + 1];
+ yr = wkr * xr + wki * xi;
+ yi = wkr * xi - wki * xr;
+ a[j] -= yr;
+ a[j + 1] -= yi;
+ a[k] += yr;
+ a[k + 1] -= yi;
+ }
+}
+
+
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.cc
new file mode 100644
index 0000000..7223337
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.cc
@@ -0,0 +1,142 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Stack trace related stuff is from kaldi.
+ * Refer to
+ * https://github.com/kaldi-asr/kaldi/blob/master/src/base/kaldi-error.cc
+ */
+
+#include "kaldi-native-fbank/csrc/log.h"
+
+#ifdef KNF_HAVE_EXECINFO_H
+#include <execinfo.h> // To get stack trace in error messages.
+#ifdef KNF_HAVE_CXXABI_H
+#include <cxxabi.h> // For name demangling.
+// Useful to decode the stack trace, but only used if we have execinfo.h
+#endif // KNF_HAVE_CXXABI_H
+#endif // KNF_HAVE_EXECINFO_H
+
+#include <stdlib.h>
+
+#include <ctime>
+#include <iomanip>
+#include <string>
+
+namespace knf {
+
+std::string GetDateTimeStr() {
+ std::ostringstream os;
+ std::time_t t = std::time(nullptr);
+ std::tm tm = *std::localtime(&t);
+ os << std::put_time(&tm, "%F %T"); // yyyy-mm-dd hh:mm:ss
+ return os.str();
+}
+
+static bool LocateSymbolRange(const std::string &trace_name, std::size_t *begin,
+ std::size_t *end) {
+ // Find the first '_' with leading ' ' or '('.
+ *begin = std::string::npos;
+ for (std::size_t i = 1; i < trace_name.size(); ++i) {
+ if (trace_name[i] != '_') {
+ continue;
+ }
+ if (trace_name[i - 1] == ' ' || trace_name[i - 1] == '(') {
+ *begin = i;
+ break;
+ }
+ }
+ if (*begin == std::string::npos) {
+ return false;
+ }
+ *end = trace_name.find_first_of(" +", *begin);
+ return *end != std::string::npos;
+}
+
+#ifdef KNF_HAVE_EXECINFO_H
+static std::string Demangle(const std::string &trace_name) {
+#ifndef KNF_HAVE_CXXABI_H
+ return trace_name;
+#else // KNF_HAVE_CXXABI_H
+ // Try demangle the symbol. We are trying to support the following formats
+ // produced by different platforms:
+ //
+ // Linux:
+ // ./kaldi-error-test(_ZN5kaldi13UnitTestErrorEv+0xb) [0x804965d]
+ //
+ // Mac:
+ // 0 server 0x000000010f67614d _ZNK5kaldi13MessageLogger10LogMessageEv + 813
+ //
+ // We want to extract the name e.g., '_ZN5kaldi13UnitTestErrorEv' and
+ // demangle it info a readable name like kaldi::UnitTextError.
+ std::size_t begin, end;
+ if (!LocateSymbolRange(trace_name, &begin, &end)) {
+ return trace_name;
+ }
+ std::string symbol = trace_name.substr(begin, end - begin);
+ int status;
+ char *demangled_name = abi::__cxa_demangle(symbol.c_str(), 0, 0, &status);
+ if (status == 0 && demangled_name != nullptr) {
+ symbol = demangled_name;
+ free(demangled_name);
+ }
+ return trace_name.substr(0, begin) + symbol +
+ trace_name.substr(end, std::string::npos);
+#endif // KNF_HAVE_CXXABI_H
+}
+#endif // KNF_HAVE_EXECINFO_H
+
+std::string GetStackTrace() {
+ std::string ans;
+#ifdef KNF_HAVE_EXECINFO_H
+ constexpr const std::size_t kMaxTraceSize = 50;
+ constexpr const std::size_t kMaxTracePrint = 50; // Must be even.
+ // Buffer for the trace.
+ void *trace[kMaxTraceSize];
+ // Get the trace.
+ std::size_t size = backtrace(trace, kMaxTraceSize);
+ // Get the trace symbols.
+ char **trace_symbol = backtrace_symbols(trace, size);
+ if (trace_symbol == nullptr) return ans;
+
+ // Compose a human-readable backtrace string.
+ ans += "[ Stack-Trace: ]\n";
+ if (size <= kMaxTracePrint) {
+ for (std::size_t i = 0; i < size; ++i) {
+ ans += Demangle(trace_symbol[i]) + "\n";
+ }
+ } else { // Print out first+last (e.g.) 5.
+ for (std::size_t i = 0; i < kMaxTracePrint / 2; ++i) {
+ ans += Demangle(trace_symbol[i]) + "\n";
+ }
+ ans += ".\n.\n.\n";
+ for (std::size_t i = size - kMaxTracePrint / 2; i < size; ++i) {
+ ans += Demangle(trace_symbol[i]) + "\n";
+ }
+ if (size == kMaxTraceSize)
+ ans += ".\n.\n.\n"; // Stack was too long, probably a bug.
+ }
+
+ // We must free the array of pointers allocated by backtrace_symbols(),
+ // but not the strings themselves.
+ free(trace_symbol);
+#endif // KNF_HAVE_EXECINFO_H
+ return ans;
+}
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.h
new file mode 100644
index 0000000..bd21cc3
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.h
@@ -0,0 +1,383 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// The content in this file is copied/modified from
+// https://github.com/k2-fsa/k2/blob/master/k2/csrc/log.h
+#ifndef KALDI_NATIVE_FBANK_CSRC_LOG_H_
+#define KALDI_NATIVE_FBANK_CSRC_LOG_H_
+
+#include <stdio.h>
+
+#include <mutex> // NOLINT
+#include <sstream>
+#include <string>
+
+namespace knf {
+
+#if KNF_ENABLE_CHECK
+
+#if defined(NDEBUG)
+constexpr bool kDisableDebug = true;
+#else
+constexpr bool kDisableDebug = false;
+#endif
+
+enum class LogLevel {
+ kTrace = 0,
+ kDebug = 1,
+ kInfo = 2,
+ kWarning = 3,
+ kError = 4,
+ kFatal = 5, // print message and abort the program
+};
+
+// They are used in KNF_LOG(xxx), so their names
+// do not follow the google c++ code style
+//
+// You can use them in the following way:
+//
+// KNF_LOG(TRACE) << "some message";
+// KNF_LOG(DEBUG) << "some message";
+#ifndef _MSC_VER
+constexpr LogLevel TRACE = LogLevel::kTrace;
+constexpr LogLevel DEBUG = LogLevel::kDebug;
+constexpr LogLevel INFO = LogLevel::kInfo;
+constexpr LogLevel WARNING = LogLevel::kWarning;
+constexpr LogLevel ERROR = LogLevel::kError;
+constexpr LogLevel FATAL = LogLevel::kFatal;
+#else
+#define TRACE LogLevel::kTrace
+#define DEBUG LogLevel::kDebug
+#define INFO LogLevel::kInfo
+#define WARNING LogLevel::kWarning
+#define ERROR LogLevel::kError
+#define FATAL LogLevel::kFatal
+#endif
+
+std::string GetStackTrace();
+
+/* Return the current log level.
+
+
+ If the current log level is TRACE, then all logged messages are printed out.
+
+ If the current log level is DEBUG, log messages with "TRACE" level are not
+ shown and all other levels are printed out.
+
+ Similarly, if the current log level is INFO, log message with "TRACE" and
+ "DEBUG" are not shown and all other levels are printed out.
+
+ If it is FATAL, then only FATAL messages are shown.
+ */
+inline LogLevel GetCurrentLogLevel() {
+ static LogLevel log_level = INFO;
+ static std::once_flag init_flag;
+ std::call_once(init_flag, []() {
+ const char *env_log_level = std::getenv("KNF_LOG_LEVEL");
+ if (env_log_level == nullptr) return;
+
+ std::string s = env_log_level;
+ if (s == "TRACE")
+ log_level = TRACE;
+ else if (s == "DEBUG")
+ log_level = DEBUG;
+ else if (s == "INFO")
+ log_level = INFO;
+ else if (s == "WARNING")
+ log_level = WARNING;
+ else if (s == "ERROR")
+ log_level = ERROR;
+ else if (s == "FATAL")
+ log_level = FATAL;
+ else
+ fprintf(stderr,
+ "Unknown KNF_LOG_LEVEL: %s"
+ "\nSupported values are: "
+ "TRACE, DEBUG, INFO, WARNING, ERROR, FATAL",
+ s.c_str());
+ });
+ return log_level;
+}
+
+inline bool EnableAbort() {
+ static std::once_flag init_flag;
+ static bool enable_abort = false;
+ std::call_once(init_flag, []() {
+ enable_abort = (std::getenv("KNF_ABORT") != nullptr);
+ });
+ return enable_abort;
+}
+
+class Logger {
+ public:
+ Logger(const char *filename, const char *func_name, uint32_t line_num,
+ LogLevel level)
+ : filename_(filename),
+ func_name_(func_name),
+ line_num_(line_num),
+ level_(level) {
+ cur_level_ = GetCurrentLogLevel();
+ fprintf(stderr, "here\n");
+ switch (level) {
+ case TRACE:
+ if (cur_level_ <= TRACE) fprintf(stderr, "[T] ");
+ break;
+ case DEBUG:
+ if (cur_level_ <= DEBUG) fprintf(stderr, "[D] ");
+ break;
+ case INFO:
+ if (cur_level_ <= INFO) fprintf(stderr, "[I] ");
+ break;
+ case WARNING:
+ if (cur_level_ <= WARNING) fprintf(stderr, "[W] ");
+ break;
+ case ERROR:
+ if (cur_level_ <= ERROR) fprintf(stderr, "[E] ");
+ break;
+ case FATAL:
+ if (cur_level_ <= FATAL) fprintf(stderr, "[F] ");
+ break;
+ }
+
+ if (cur_level_ <= level_) {
+ fprintf(stderr, "%s:%u:%s ", filename, line_num, func_name);
+ }
+ }
+
+ ~Logger() noexcept(false) {
+ static constexpr const char *kErrMsg = R"(
+ Some bad things happened. Please read the above error messages and stack
+ trace. If you are using Python, the following command may be helpful:
+
+ gdb --args python /path/to/your/code.py
+
+ (You can use `gdb` to debug the code. Please consider compiling
+ a debug version of KNF.).
+
+ If you are unable to fix it, please open an issue at:
+
+ https://github.com/csukuangfj/kaldi-native-fbank/issues/new
+ )";
+ fprintf(stderr, "\n");
+ if (level_ == FATAL) {
+ std::string stack_trace = GetStackTrace();
+ if (!stack_trace.empty()) {
+ fprintf(stderr, "\n\n%s\n", stack_trace.c_str());
+ }
+
+ fflush(nullptr);
+
+#ifndef __ANDROID_API__
+ if (EnableAbort()) {
+ // NOTE: abort() will terminate the program immediately without
+ // printing the Python stack backtrace.
+ abort();
+ }
+
+ throw std::runtime_error(kErrMsg);
+#else
+ abort();
+#endif
+ }
+ }
+
+ const Logger &operator<<(bool b) const {
+ if (cur_level_ <= level_) {
+ fprintf(stderr, b ? "true" : "false");
+ }
+ return *this;
+ }
+
+ const Logger &operator<<(int8_t i) const {
+ if (cur_level_ <= level_) fprintf(stderr, "%d", i);
+ return *this;
+ }
+
+ const Logger &operator<<(const char *s) const {
+ if (cur_level_ <= level_) fprintf(stderr, "%s", s);
+ return *this;
+ }
+
+ const Logger &operator<<(int32_t i) const {
+ if (cur_level_ <= level_) fprintf(stderr, "%d", i);
+ return *this;
+ }
+
+ const Logger &operator<<(uint32_t i) const {
+ if (cur_level_ <= level_) fprintf(stderr, "%u", i);
+ return *this;
+ }
+
+ const Logger &operator<<(uint64_t i) const {
+ if (cur_level_ <= level_)
+ fprintf(stderr, "%llu", (long long unsigned int)i); // NOLINT
+ return *this;
+ }
+
+ const Logger &operator<<(int64_t i) const {
+ if (cur_level_ <= level_)
+ fprintf(stderr, "%lli", (long long int)i); // NOLINT
+ return *this;
+ }
+
+ const Logger &operator<<(float f) const {
+ if (cur_level_ <= level_) fprintf(stderr, "%f", f);
+ return *this;
+ }
+
+ const Logger &operator<<(double d) const {
+ if (cur_level_ <= level_) fprintf(stderr, "%f", d);
+ return *this;
+ }
+
+ template <typename T>
+ const Logger &operator<<(const T &t) const {
+ // require T overloads operator<<
+ std::ostringstream os;
+ os << t;
+ return *this << os.str().c_str();
+ }
+
+ // specialization to fix compile error: `stringstream << nullptr` is ambiguous
+ const Logger &operator<<(const std::nullptr_t &null) const {
+ if (cur_level_ <= level_) *this << "(null)";
+ return *this;
+ }
+
+ private:
+ const char *filename_;
+ const char *func_name_;
+ uint32_t line_num_;
+ LogLevel level_;
+ LogLevel cur_level_;
+};
+#endif // KNF_ENABLE_CHECK
+
+class Voidifier {
+ public:
+#if KNF_ENABLE_CHECK
+ void operator&(const Logger &) const {}
+#endif
+};
+#if !defined(KNF_ENABLE_CHECK)
+template <typename T>
+const Voidifier &operator<<(const Voidifier &v, T &&) {
+ return v;
+}
+#endif
+
+} // namespace knf
+
+#define KNF_STATIC_ASSERT(x) static_assert(x, "")
+
+#ifdef KNF_ENABLE_CHECK
+
+#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \
+ defined(__PRETTY_FUNCTION__)
+// for clang and GCC
+#define KNF_FUNC __PRETTY_FUNCTION__
+#else
+// for other compilers
+#define KNF_FUNC __func__
+#endif
+
+#define KNF_CHECK(x) \
+ (x) ? (void)0 \
+ : ::knf::Voidifier() & \
+ ::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::FATAL) \
+ << "Check failed: " << #x << " "
+
+// WARNING: x and y may be evaluated multiple times, but this happens only
+// when the check fails. Since the program aborts if it fails, we don't think
+// the extra evaluation of x and y matters.
+//
+// CAUTION: we recommend the following use case:
+//
+// auto x = Foo();
+// auto y = Bar();
+// KNF_CHECK_EQ(x, y) << "Some message";
+//
+// And please avoid
+//
+// KNF_CHECK_EQ(Foo(), Bar());
+//
+// if `Foo()` or `Bar()` causes some side effects, e.g., changing some
+// local static variables or global variables.
+#define _KNF_CHECK_OP(x, y, op) \
+ ((x)op(y)) ? (void)0 \
+ : ::knf::Voidifier() & \
+ ::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::FATAL) \
+ << "Check failed: " << #x << " " << #op << " " << #y \
+ << " (" << (x) << " vs. " << (y) << ") "
+
+#define KNF_CHECK_EQ(x, y) _KNF_CHECK_OP(x, y, ==)
+#define KNF_CHECK_NE(x, y) _KNF_CHECK_OP(x, y, !=)
+#define KNF_CHECK_LT(x, y) _KNF_CHECK_OP(x, y, <)
+#define KNF_CHECK_LE(x, y) _KNF_CHECK_OP(x, y, <=)
+#define KNF_CHECK_GT(x, y) _KNF_CHECK_OP(x, y, >)
+#define KNF_CHECK_GE(x, y) _KNF_CHECK_OP(x, y, >=)
+
+#define KNF_LOG(x) ::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::x)
+
+// ------------------------------------------------------------
+// For debug check
+// ------------------------------------------------------------
+// If you define the macro "-D NDEBUG" while compiling kaldi-native-fbank,
+// the following macros are in fact empty and does nothing.
+
+#define KNF_DCHECK(x) ::knf::kDisableDebug ? (void)0 : KNF_CHECK(x)
+
+#define KNF_DCHECK_EQ(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_EQ(x, y)
+
+#define KNF_DCHECK_NE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_NE(x, y)
+
+#define KNF_DCHECK_LT(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_LT(x, y)
+
+#define KNF_DCHECK_LE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_LE(x, y)
+
+#define KNF_DCHECK_GT(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_GT(x, y)
+
+#define KNF_DCHECK_GE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_GE(x, y)
+
+#define KNF_DLOG(x) \
+ ::knf::kDisableDebug ? (void)0 : ::knf::Voidifier() & KNF_LOG(x)
+
+#else
+
+#define KNF_CHECK(x) ::knf::Voidifier()
+#define KNF_LOG(x) ::knf::Voidifier()
+
+#define KNF_CHECK_EQ(x, y) ::knf::Voidifier()
+#define KNF_CHECK_NE(x, y) ::knf::Voidifier()
+#define KNF_CHECK_LT(x, y) ::knf::Voidifier()
+#define KNF_CHECK_LE(x, y) ::knf::Voidifier()
+#define KNF_CHECK_GT(x, y) ::knf::Voidifier()
+#define KNF_CHECK_GE(x, y) ::knf::Voidifier()
+
+#define KNF_DCHECK(x) ::knf::Voidifier()
+#define KNF_DLOG(x) ::knf::Voidifier()
+#define KNF_DCHECK_EQ(x, y) ::knf::Voidifier()
+#define KNF_DCHECK_NE(x, y) ::knf::Voidifier()
+#define KNF_DCHECK_LT(x, y) ::knf::Voidifier()
+#define KNF_DCHECK_LE(x, y) ::knf::Voidifier()
+#define KNF_DCHECK_GT(x, y) ::knf::Voidifier()
+#define KNF_DCHECK_GE(x, y) ::knf::Voidifier()
+
+#endif // KNF_CHECK_NE
+
+#endif // KALDI_NATIVE_FBANK_CSRC_LOG_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.cc
new file mode 100644
index 0000000..50c857f
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.cc
@@ -0,0 +1,257 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is copied/modified from kaldi/src/feat/mel-computations.cc
+
+#include "kaldi-native-fbank/csrc/mel-computations.h"
+
+#include <algorithm>
+#include <sstream>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+
+namespace knf {
+
+std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts) {
+ os << opts.ToString();
+ return os;
+}
+
+float MelBanks::VtlnWarpFreq(
+ float vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN.
+ float vtln_high_cutoff,
+ float low_freq, // upper+lower frequency cutoffs in mel computation
+ float high_freq, float vtln_warp_factor, float freq) {
+ /// This computes a VTLN warping function that is not the same as HTK's one,
+ /// but has similar inputs (this function has the advantage of never producing
+ /// empty bins).
+
+ /// This function computes a warp function F(freq), defined between low_freq
+ /// and high_freq inclusive, with the following properties:
+ /// F(low_freq) == low_freq
+ /// F(high_freq) == high_freq
+ /// The function is continuous and piecewise linear with two inflection
+ /// points.
+ /// The lower inflection point (measured in terms of the unwarped
+ /// frequency) is at frequency l, determined as described below.
+ /// The higher inflection point is at a frequency h, determined as
+ /// described below.
+ /// If l <= f <= h, then F(f) = f/vtln_warp_factor.
+ /// If the higher inflection point (measured in terms of the unwarped
+ /// frequency) is at h, then max(h, F(h)) == vtln_high_cutoff.
+ /// Since (by the last point) F(h) == h/vtln_warp_factor, then
+ /// max(h, h/vtln_warp_factor) == vtln_high_cutoff, so
+ /// h = vtln_high_cutoff / max(1, 1/vtln_warp_factor).
+ /// = vtln_high_cutoff * min(1, vtln_warp_factor).
+ /// If the lower inflection point (measured in terms of the unwarped
+ /// frequency) is at l, then min(l, F(l)) == vtln_low_cutoff
+ /// This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor)
+ /// = vtln_low_cutoff * max(1, vtln_warp_factor)
+
+ if (freq < low_freq || freq > high_freq)
+ return freq; // in case this gets called
+ // for out-of-range frequencies, just return the freq.
+
+ KNF_CHECK_GT(vtln_low_cutoff, low_freq);
+ KNF_CHECK_LT(vtln_high_cutoff, high_freq);
+
+ float one = 1.0f;
+ float l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
+ float h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
+ float scale = 1.0f / vtln_warp_factor;
+ float Fl = scale * l; // F(l);
+ float Fh = scale * h; // F(h);
+ KNF_CHECK(l > low_freq && h < high_freq);
+ // slope of left part of the 3-piece linear function
+ float scale_left = (Fl - low_freq) / (l - low_freq);
+ // [slope of center part is just "scale"]
+
+ // slope of right part of the 3-piece linear function
+ float scale_right = (high_freq - Fh) / (high_freq - h);
+
+ if (freq < l) {
+ return low_freq + scale_left * (freq - low_freq);
+ } else if (freq < h) {
+ return scale * freq;
+ } else { // freq >= h
+ return high_freq + scale_right * (freq - high_freq);
+ }
+}
+
+float MelBanks::VtlnWarpMelFreq(
+ float vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN.
+ float vtln_high_cutoff,
+ float low_freq, // upper+lower frequency cutoffs in mel computation
+ float high_freq, float vtln_warp_factor, float mel_freq) {
+ return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, low_freq,
+ high_freq, vtln_warp_factor,
+ InverseMelScale(mel_freq)));
+}
+
+MelBanks::MelBanks(const MelBanksOptions &opts,
+ const FrameExtractionOptions &frame_opts,
+ float vtln_warp_factor)
+ : htk_mode_(opts.htk_mode) {
+ int32_t num_bins = opts.num_bins;
+ if (num_bins < 3) KNF_LOG(FATAL) << "Must have at least 3 mel bins";
+
+ float sample_freq = frame_opts.samp_freq;
+ int32_t window_length_padded = frame_opts.PaddedWindowSize();
+ KNF_CHECK_EQ(window_length_padded % 2, 0);
+
+ int32_t num_fft_bins = window_length_padded / 2;
+ float nyquist = 0.5f * sample_freq;
+
+ float low_freq = opts.low_freq, high_freq;
+ if (opts.high_freq > 0.0f)
+ high_freq = opts.high_freq;
+ else
+ high_freq = nyquist + opts.high_freq;
+
+ if (low_freq < 0.0f || low_freq >= nyquist || high_freq <= 0.0f ||
+ high_freq > nyquist || high_freq <= low_freq) {
+ KNF_LOG(FATAL) << "Bad values in options: low-freq " << low_freq
+ << " and high-freq " << high_freq << " vs. nyquist "
+ << nyquist;
+ }
+
+ float fft_bin_width = sample_freq / window_length_padded;
+ // fft-bin width [think of it as Nyquist-freq / half-window-length]
+
+ float mel_low_freq = MelScale(low_freq);
+ float mel_high_freq = MelScale(high_freq);
+
+ debug_ = opts.debug_mel;
+
+ // divide by num_bins+1 in next line because of end-effects where the bins
+ // spread out to the sides.
+ float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
+
+ float vtln_low = opts.vtln_low, vtln_high = opts.vtln_high;
+ if (vtln_high < 0.0f) {
+ vtln_high += nyquist;
+ }
+
+ if (vtln_warp_factor != 1.0f &&
+ (vtln_low < 0.0f || vtln_low <= low_freq || vtln_low >= high_freq ||
+ vtln_high <= 0.0f || vtln_high >= high_freq || vtln_high <= vtln_low)) {
+ KNF_LOG(FATAL) << "Bad values in options: vtln-low " << vtln_low
+ << " and vtln-high " << vtln_high << ", versus "
+ << "low-freq " << low_freq << " and high-freq " << high_freq;
+ }
+
+ bins_.resize(num_bins);
+ center_freqs_.resize(num_bins);
+
+ for (int32_t bin = 0; bin < num_bins; ++bin) {
+ float left_mel = mel_low_freq + bin * mel_freq_delta,
+ center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
+ right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
+
+ if (vtln_warp_factor != 1.0f) {
+ left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
+ vtln_warp_factor, left_mel);
+ center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
+ vtln_warp_factor, center_mel);
+ right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
+ vtln_warp_factor, right_mel);
+ }
+ center_freqs_[bin] = InverseMelScale(center_mel);
+
+ // this_bin will be a vector of coefficients that is only
+ // nonzero where this mel bin is active.
+ std::vector<float> this_bin(num_fft_bins);
+
+ int32_t first_index = -1, last_index = -1;
+ for (int32_t i = 0; i < num_fft_bins; ++i) {
+ float freq = (fft_bin_width * i); // Center frequency of this fft
+ // bin.
+ float mel = MelScale(freq);
+ if (mel > left_mel && mel < right_mel) {
+ float weight;
+ if (mel <= center_mel)
+ weight = (mel - left_mel) / (center_mel - left_mel);
+ else
+ weight = (right_mel - mel) / (right_mel - center_mel);
+ this_bin[i] = weight;
+ if (first_index == -1) first_index = i;
+ last_index = i;
+ }
+ }
+ KNF_CHECK(first_index != -1 && last_index >= first_index &&
+ "You may have set num_mel_bins too large.");
+
+ bins_[bin].first = first_index;
+ int32_t size = last_index + 1 - first_index;
+ bins_[bin].second.insert(bins_[bin].second.end(),
+ this_bin.begin() + first_index,
+ this_bin.begin() + first_index + size);
+
+ // Replicate a bug in HTK, for testing purposes.
+ if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f) {
+ bins_[bin].second[0] = 0.0;
+ }
+ } // for (int32_t bin = 0; bin < num_bins; ++bin) {
+
+ if (debug_) {
+ std::ostringstream os;
+ for (size_t i = 0; i < bins_.size(); i++) {
+ os << "bin " << i << ", offset = " << bins_[i].first << ", vec = ";
+ for (auto k : bins_[i].second) os << k << ", ";
+ os << "\n";
+ }
+ KNF_LOG(INFO) << os.str();
+ }
+}
+
+// "power_spectrum" contains fft energies.
+void MelBanks::Compute(const float *power_spectrum,
+ float *mel_energies_out) const {
+ int32_t num_bins = bins_.size();
+
+ for (int32_t i = 0; i < num_bins; i++) {
+ int32_t offset = bins_[i].first;
+ const auto &v = bins_[i].second;
+ float energy = 0;
+ for (int32_t k = 0; k != v.size(); ++k) {
+ energy += v[k] * power_spectrum[k + offset];
+ }
+
+ // HTK-like flooring- for testing purposes (we prefer dither)
+ if (htk_mode_ && energy < 1.0) {
+ energy = 1.0;
+ }
+
+ mel_energies_out[i] = energy;
+
+ // The following assert was added due to a problem with OpenBlas that
+ // we had at one point (it was a bug in that library). Just to detect
+ // it early.
+ KNF_CHECK_EQ(energy, energy); // check that energy is not nan
+ }
+
+ if (debug_) {
+ fprintf(stderr, "MEL BANKS:\n");
+ for (int32_t i = 0; i < num_bins; i++)
+ fprintf(stderr, " %f", mel_energies_out[i]);
+ fprintf(stderr, "\n");
+ }
+}
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.h
new file mode 100644
index 0000000..c186cc2
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.h
@@ -0,0 +1,117 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// This file is copied/modified from kaldi/src/feat/mel-computations.h
+#ifndef KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
+#define KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
+
+#include <cmath>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+
+namespace knf {
+
+struct MelBanksOptions {
+ int32_t num_bins = 25; // e.g. 25; number of triangular bins
+ float low_freq = 20; // e.g. 20; lower frequency cutoff
+
+ // an upper frequency cutoff; 0 -> no cutoff, negative
+ // ->added to the Nyquist frequency to get the cutoff.
+ float high_freq = 0;
+
+ float vtln_low = 100; // vtln lower cutoff of warping function.
+
+ // vtln upper cutoff of warping function: if negative, added
+ // to the Nyquist frequency to get the cutoff.
+ float vtln_high = -500;
+
+ bool debug_mel = false;
+ // htk_mode is a "hidden" config, it does not show up on command line.
+ // Enables more exact compatibility with HTK, for testing purposes. Affects
+ // mel-energy flooring and reproduces a bug in HTK.
+ bool htk_mode = false;
+
+ std::string ToString() const {
+ std::ostringstream os;
+ os << "num_bins: " << num_bins << "\n";
+ os << "low_freq: " << low_freq << "\n";
+ os << "high_freq: " << high_freq << "\n";
+ os << "vtln_low: " << vtln_low << "\n";
+ os << "vtln_high: " << vtln_high << "\n";
+ os << "debug_mel: " << debug_mel << "\n";
+ os << "htk_mode: " << htk_mode << "\n";
+ return os.str();
+ }
+};
+
+std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts);
+
+class MelBanks {
+ public:
+ static inline float InverseMelScale(float mel_freq) {
+ return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
+ }
+
+ static inline float MelScale(float freq) {
+ return 1127.0f * logf(1.0f + freq / 700.0f);
+ }
+
+ static float VtlnWarpFreq(
+ float vtln_low_cutoff,
+ float vtln_high_cutoff, // discontinuities in warp func
+ float low_freq,
+ float high_freq, // upper+lower frequency cutoffs in
+ // the mel computation
+ float vtln_warp_factor, float freq);
+
+ static float VtlnWarpMelFreq(float vtln_low_cutoff, float vtln_high_cutoff,
+ float low_freq, float high_freq,
+ float vtln_warp_factor, float mel_freq);
+
+ // TODO(fangjun): Remove vtln_warp_factor
+ MelBanks(const MelBanksOptions &opts,
+ const FrameExtractionOptions &frame_opts, float vtln_warp_factor);
+
+ /// Compute Mel energies (note: not log energies).
+ /// At input, "fft_energies" contains the FFT energies (not log).
+ ///
+ /// @param fft_energies 1-D array of size num_fft_bins/2+1
+ /// @param mel_energies_out 1-D array of size num_mel_bins
+ void Compute(const float *fft_energies, float *mel_energies_out) const;
+
+ int32_t NumBins() const { return bins_.size(); }
+
+ private:
+ // center frequencies of bins, numbered from 0 ... num_bins-1.
+ // Needed by GetCenterFreqs().
+ std::vector<float> center_freqs_;
+
+ // the "bins_" vector is a vector, one for each bin, of a pair:
+ // (the first nonzero fft-bin), (the vector of weights).
+ std::vector<std::pair<int32_t, std::vector<float>>> bins_;
+
+ // TODO(fangjun): Remove debug_ and htk_mode_
+ bool debug_;
+ bool htk_mode_;
+};
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.cc
new file mode 100644
index 0000000..833a6f0
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.cc
@@ -0,0 +1,165 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// The content in this file is copied/modified from
+// This file is copied/modified from kaldi/src/feat/online-feature.cc
+
+#include "kaldi-native-fbank/csrc/online-feature.h"
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+#include "kaldi-native-fbank/csrc/log.h"
+
+namespace knf {
+
+RecyclingVector::RecyclingVector(int32_t items_to_hold)
+ : items_to_hold_(items_to_hold == 0 ? -1 : items_to_hold),
+ first_available_index_(0) {}
+
+const float *RecyclingVector::At(int32_t index) const {
+ if (index < first_available_index_) {
+ KNF_LOG(FATAL) << "Attempted to retrieve feature vector that was "
+ "already removed by the RecyclingVector (index = "
+ << index << "; "
+ << "first_available_index = " << first_available_index_
+ << "; "
+ << "size = " << Size() << ")";
+ }
+ // 'at' does size checking.
+ return items_.at(index - first_available_index_).data();
+}
+
+void RecyclingVector::PushBack(std::vector<float> item) {
+ // Note: -1 is a larger number when treated as unsigned
+ if (items_.size() == static_cast<size_t>(items_to_hold_)) {
+ items_.pop_front();
+ ++first_available_index_;
+ }
+ items_.push_back(std::move(item));
+}
+
+int32_t RecyclingVector::Size() const {
+ return first_available_index_ + static_cast<int32_t>(items_.size());
+}
+
+template <class C>
+OnlineGenericBaseFeature<C>::OnlineGenericBaseFeature(
+ const typename C::Options &opts)
+ : computer_(opts),
+ window_function_(computer_.GetFrameOptions()),
+ features_(opts.frame_opts.max_feature_vectors),
+ input_finished_(false),
+ waveform_offset_(0) {
+ // RE the following assert: search for ONLINE_IVECTOR_LIMIT in
+ // online-ivector-feature.cc.
+ // Casting to uint32, an unsigned type, means that -1 would be treated
+ // as `very large`.
+ KNF_CHECK(static_cast<uint32_t>(opts.frame_opts.max_feature_vectors) > 200);
+}
+
+template <class C>
+void OnlineGenericBaseFeature<C>::AcceptWaveform(float sampling_rate,
+ const float *waveform,
+ int32_t n) {
+ if (n == 0) {
+ return; // Nothing to do.
+ }
+
+ if (input_finished_) {
+ KNF_LOG(FATAL) << "AcceptWaveform called after InputFinished() was called.";
+ }
+
+ KNF_CHECK_EQ(sampling_rate, computer_.GetFrameOptions().samp_freq);
+
+ waveform_remainder_.insert(waveform_remainder_.end(), waveform, waveform + n);
+
+ ComputeFeatures();
+}
+
+template <class C>
+void OnlineGenericBaseFeature<C>::InputFinished() {
+ input_finished_ = true;
+ ComputeFeatures();
+}
+
+template <class C>
+void OnlineGenericBaseFeature<C>::ComputeFeatures() {
+ const FrameExtractionOptions &frame_opts = computer_.GetFrameOptions();
+
+ int64_t num_samples_total = waveform_offset_ + waveform_remainder_.size();
+
+ int32_t num_frames_old = features_.Size();
+
+ int32_t num_frames_new =
+ NumFrames(num_samples_total, frame_opts, input_finished_);
+
+ KNF_CHECK_GE(num_frames_new, num_frames_old);
+
+ // note: this online feature-extraction code does not support VTLN.
+ float vtln_warp = 1.0;
+
+ std::vector<float> window;
+ bool need_raw_log_energy = computer_.NeedRawLogEnergy();
+
+ for (int32_t frame = num_frames_old; frame < num_frames_new; ++frame) {
+ std::fill(window.begin(), window.end(), 0);
+ float raw_log_energy = 0.0;
+ ExtractWindow(waveform_offset_, waveform_remainder_, frame, frame_opts,
+ window_function_, &window,
+ need_raw_log_energy ? &raw_log_energy : nullptr);
+
+ std::vector<float> this_feature(computer_.Dim());
+
+ computer_.Compute(raw_log_energy, vtln_warp, &window, this_feature.data());
+ features_.PushBack(std::move(this_feature));
+ }
+
+ // OK, we will now discard any portion of the signal that will not be
+ // necessary to compute frames in the future.
+ int64_t first_sample_of_next_frame =
+ FirstSampleOfFrame(num_frames_new, frame_opts);
+
+ int32_t samples_to_discard = first_sample_of_next_frame - waveform_offset_;
+
+ if (samples_to_discard > 0) {
+ // discard the leftmost part of the waveform that we no longer need.
+ int32_t new_num_samples =
+ static_cast<int32_t>(waveform_remainder_.size()) - samples_to_discard;
+
+ if (new_num_samples <= 0) {
+ // odd, but we'll try to handle it.
+ waveform_offset_ += waveform_remainder_.size();
+ waveform_remainder_.resize(0);
+ } else {
+ std::vector<float> new_remainder(new_num_samples);
+
+ std::copy(waveform_remainder_.begin() + samples_to_discard,
+ waveform_remainder_.end(), new_remainder.begin());
+ waveform_offset_ += samples_to_discard;
+
+ waveform_remainder_.swap(new_remainder);
+ }
+ }
+}
+
+template class OnlineGenericBaseFeature<FbankComputer>;
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.h
new file mode 100644
index 0000000..5ca5511
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.h
@@ -0,0 +1,142 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// The content in this file is copied/modified from
+// This file is copied/modified from kaldi/src/feat/online-feature.h
+#ifndef KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
+#define KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
+
+#include <cstdint>
+#include <deque>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+
+namespace knf {
+
+/// This class serves as a storage for feature vectors with an option to limit
+/// the memory usage by removing old elements. The deleted frames indices are
+/// "remembered" so that regardless of the MAX_ITEMS setting, the user always
+/// provides the indices as if no deletion was being performed.
+/// This is useful when processing very long recordings which would otherwise
+/// cause the memory to eventually blow up when the features are not being
+/// removed.
+class RecyclingVector {
+ public:
+ /// By default it does not remove any elements.
+ explicit RecyclingVector(int32_t items_to_hold = -1);
+
+ ~RecyclingVector() = default;
+ RecyclingVector(const RecyclingVector &) = delete;
+ RecyclingVector &operator=(const RecyclingVector &) = delete;
+
+ // The pointer is owned by RecyclingVector
+ // Users should not free it
+ const float *At(int32_t index) const;
+
+ void PushBack(std::vector<float> item);
+
+ /// This method returns the size as if no "recycling" had happened,
+ /// i.e. equivalent to the number of times the PushBack method has been
+ /// called.
+ int32_t Size() const;
+
+ private:
+ std::deque<std::vector<float>> items_;
+ int32_t items_to_hold_;
+ int32_t first_available_index_;
+};
+
+/// This is a templated class for online feature extraction;
+/// it's templated on a class like MfccComputer or PlpComputer
+/// that does the basic feature extraction.
+template <class C>
+class OnlineGenericBaseFeature {
+ public:
+ // Constructor from options class
+ explicit OnlineGenericBaseFeature(const typename C::Options &opts);
+
+ int32_t Dim() const { return computer_.Dim(); }
+
+ float FrameShiftInSeconds() const {
+ return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
+ }
+
+ int32_t NumFramesReady() const { return features_.Size(); }
+
+ // Note: IsLastFrame() will only ever return true if you have called
+ // InputFinished() (and this frame is the last frame).
+ bool IsLastFrame(int32_t frame) const {
+ return input_finished_ && frame == NumFramesReady() - 1;
+ }
+
+ const float *GetFrame(int32_t frame) const { return features_.At(frame); }
+
+ // This would be called from the application, when you get
+ // more wave data. Note: the sampling_rate is only provided so
+ // the code can assert that it matches the sampling rate
+ // expected in the options.
+ //
+ // @param sampling_rate The sampling_rate of the input waveform
+ // @param waveform Pointer to a 1-D array of size n
+ // @param n Number of entries in waveform
+ void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n);
+
+ // InputFinished() tells the class you won't be providing any
+ // more waveform. This will help flush out the last frame or two
+ // of features, in the case where snip-edges == false; it also
+ // affects the return value of IsLastFrame().
+ void InputFinished();
+
+ private:
+ // This function computes any additional feature frames that it is possible to
+ // compute from 'waveform_remainder_', which at this point may contain more
+ // than just a remainder-sized quantity (because AcceptWaveform() appends to
+ // waveform_remainder_ before calling this function). It adds these feature
+ // frames to features_, and shifts off any now-unneeded samples of input from
+ // waveform_remainder_ while incrementing waveform_offset_ by the same amount.
+ void ComputeFeatures();
+
+ C computer_; // class that does the MFCC or PLP or filterbank computation
+
+ FeatureWindowFunction window_function_;
+
+ // features_ is the Mfcc or Plp or Fbank features that we have already
+ // computed.
+
+ RecyclingVector features_;
+
+ // True if the user has called "InputFinished()"
+ bool input_finished_;
+
+ // waveform_offset_ is the number of samples of waveform that we have
+ // already discarded, i.e. that were prior to 'waveform_remainder_'.
+ int64_t waveform_offset_;
+
+ // waveform_remainder_ is a short piece of waveform that we may need to keep
+ // after extracting all the whole frames we can (whatever length of feature
+ // will be required for the next phase of computation).
+ // It is a 1-D tensor
+ std::vector<float> waveform_remainder_;
+};
+
+using OnlineFbank = OnlineGenericBaseFeature<FbankComputer>;
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.cc
new file mode 100644
index 0000000..8cb4b84
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.cc
@@ -0,0 +1,67 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/csrc/rfft.h"
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/log.h"
+
+// see fftsg.c
+#ifdef __cplusplus
+extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);
+#else
+void rdft(int n, int isgn, double *a, int *ip, double *w);
+#endif
+
+namespace knf {
+class Rfft::RfftImpl {
+ public:
+ explicit RfftImpl(int32_t n) : n_(n), ip_(2 + std::sqrt(n / 2)), w_(n / 2) {
+ KNF_CHECK_EQ(n & (n - 1), 0);
+ }
+
+ void Compute(float *in_out) {
+ std::vector<double> d(in_out, in_out + n_);
+
+ Compute(d.data());
+
+ std::copy(d.begin(), d.end(), in_out);
+ }
+
+ void Compute(double *in_out) {
+ // 1 means forward fft
+ rdft(n_, 1, in_out, ip_.data(), w_.data());
+ }
+
+ private:
+ int32_t n_;
+ std::vector<int32_t> ip_;
+ std::vector<double> w_;
+};
+
+Rfft::Rfft(int32_t n) : impl_(std::make_unique<RfftImpl>(n)) {}
+
+Rfft::~Rfft() = default;
+
+void Rfft::Compute(float *in_out) { impl_->Compute(in_out); }
+void Rfft::Compute(double *in_out) { impl_->Compute(in_out); }
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.h
new file mode 100644
index 0000000..c8cb9f8
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.h
@@ -0,0 +1,56 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_CSRC_RFFT_H_
+#define KALDI_NATIVE_FBANK_CSRC_RFFT_H_
+
+#include <memory>
+
+namespace knf {
+
+// n-point Real discrete Fourier transform
+// where n is a power of 2. n >= 2
+//
+// R[k] = sum_j=0^n-1 in[j]*cos(2*pi*j*k/n), 0<=k<=n/2
+// I[k] = sum_j=0^n-1 in[j]*sin(2*pi*j*k/n), 0<k<n/2
+class Rfft {
+ public:
+ // @param n Number of fft bins. it should be a power of 2.
+ explicit Rfft(int32_t n);
+ ~Rfft();
+
+ /** @param in_out A 1-D array of size n.
+ * On return:
+ * in_out[0] = R[0]
+ * in_out[1] = R[n/2]
+ * for 1 < k < n/2,
+ * in_out[2*k] = R[k]
+ * in_out[2*k+1] = I[k]
+ *
+ */
+ void Compute(float *in_out);
+ void Compute(double *in_out);
+
+ private:
+ class RfftImpl;
+ std::unique_ptr<RfftImpl> impl_;
+};
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_CSRC_RFFT_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-log.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-log.cc
new file mode 100644
index 0000000..6379633
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-log.cc
@@ -0,0 +1,73 @@
+/**
+ * Copyright 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include "kaldi-native-fbank/csrc/log.h"
+
+namespace knf {
+
+#if KNF_ENABLE_CHECK
+
+TEST(Log, TestLog) {
+ KNF_LOG(TRACE) << "this is a trace message";
+ KNF_LOG(DEBUG) << "this is a debug message";
+ KNF_LOG(INFO) << "this is an info message";
+ KNF_LOG(WARNING) << "this is a warning message";
+ KNF_LOG(ERROR) << "this is an error message";
+
+ ASSERT_THROW(KNF_LOG(FATAL) << "This will crash the program",
+ std::runtime_error);
+
+ // For debug build
+
+ KNF_DLOG(TRACE) << "this is a trace message for debug build";
+ KNF_DLOG(DEBUG) << "this is a trace message for debug build";
+ KNF_DLOG(INFO) << "this is a trace message for debug build";
+ KNF_DLOG(ERROR) << "this is an error message for debug build";
+ KNF_DLOG(WARNING) << "this is a trace message for debug build";
+
+#if !defined(NDEBUG)
+ ASSERT_THROW(KNF_DLOG(FATAL) << "this is a trace message for debug build",
+ std::runtime_error);
+#endif
+}
+
+TEST(Log, TestCheck) {
+ KNF_CHECK_EQ(1, 1) << "ok";
+ KNF_CHECK_LE(1, 3) << "ok";
+ KNF_CHECK_LT(1, 2) << "ok";
+ KNF_CHECK_GT(2, 1) << "ok";
+ KNF_CHECK_GE(2, 1) << "ok";
+
+ ASSERT_THROW(KNF_CHECK_EQ(2, 1) << "bad things happened", std::runtime_error);
+
+ // for debug build
+ KNF_DCHECK_EQ(1, 1) << "ok";
+ KNF_DCHECK_LE(1, 3) << "ok";
+ KNF_DCHECK_LT(1, 2) << "ok";
+ KNF_DCHECK_GT(2, 1) << "ok";
+ KNF_DCHECK_GE(2, 1) << "ok";
+
+#if !defined(NDEBUG)
+ ASSERT_THROW(KNF_CHECK_EQ(2, 1) << "bad things happened", std::runtime_error);
+#endif
+}
+
+#endif
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-fbank.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-fbank.cc
new file mode 100644
index 0000000..9f595cf
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-fbank.cc
@@ -0,0 +1,48 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include "kaldi-native-fbank/csrc/online-feature.h"
+
+int main() {
+ knf::FbankOptions opts;
+ opts.frame_opts.dither = 0;
+ opts.mel_opts.num_bins = 10;
+
+ knf::OnlineFbank fbank(opts);
+ for (int32_t i = 0; i < 1600; ++i) {
+ float s = (i * i - i / 2) / 32767.;
+ fbank.AcceptWaveform(16000, &s, 1);
+ }
+
+ std::ostringstream os;
+
+ int32_t n = fbank.NumFramesReady();
+ for (int32_t i = 0; i != n; ++i) {
+ const float *frame = fbank.GetFrame(i);
+ for (int32_t k = 0; k != opts.mel_opts.num_bins; ++k) {
+ os << frame[k] << ", ";
+ }
+ os << "\n";
+ }
+
+ std::cout << os.str() << "\n";
+
+ return 0;
+}
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-feature.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-feature.cc
new file mode 100644
index 0000000..bfbe621
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-feature.cc
@@ -0,0 +1,59 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include "kaldi-native-fbank/csrc/online-feature.h"
+namespace knf {
+
+TEST(RecyclingVector, TestUnlimited) {
+ RecyclingVector v(-1);
+ constexpr int32_t N = 100;
+ for (int32_t i = 0; i != N; ++i) {
+ std::unique_ptr<float[]> p(new float[3]{i, i + 1, i + 2});
+ v.PushBack(std::move(p));
+ }
+ ASSERT_EQ(v.Size(), N);
+
+ for (int32_t i = 0; i != N; ++i) {
+ const float *t = v.At(i);
+ for (int32_t k = 0; k != 3; ++k) {
+ EXPECT_EQ(t[k], (i + k));
+ }
+ }
+}
+
+TEST(RecyclingVector, Testlimited) {
+ constexpr int32_t K = 3;
+ constexpr int32_t N = 10;
+ RecyclingVector v(K);
+ for (int32_t i = 0; i != N; ++i) {
+ std::unique_ptr<float[]> p(new float[3]{i, i + 1, i + 2});
+ v.PushBack(std::move(p));
+ }
+
+ ASSERT_EQ(v.Size(), N);
+
+ for (int32_t i = N - K; i != N; ++i) {
+ const float *t = v.At(i);
+
+ for (int32_t k = 0; k != 3; ++k) {
+ EXPECT_EQ(t[k], (i + k));
+ }
+ }
+}
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-rfft.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-rfft.cc
new file mode 100644
index 0000000..47f9904
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-rfft.cc
@@ -0,0 +1,52 @@
+/**
+ * Copyright 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include "kaldi-native-fbank/csrc/rfft.h"
+
+namespace knf {
+
+#if 0
+>>> import torch
+>>> a = torch.tensor([1., -1, 3, 8, 20, 6, 0, 2])
+>>> torch.fft.rfft(a)
+tensor([ 39.0000+0.0000j, -28.1924-2.2929j, 18.0000+5.0000j, -9.8076+3.7071j,
+ 9.0000+0.0000j])
+#endif
+
+TEST(Rfft, TestRfft) {
+ knf::Rfft fft(8);
+ for (int32_t i = 0; i != 10; ++i) {
+ std::vector<float> d = {1, -1, 3, 8, 20, 6, 0, 2};
+ fft.Compute(d.data());
+
+ EXPECT_EQ(d[0], 39);
+ EXPECT_EQ(d[1], 9);
+
+ EXPECT_NEAR(d[2], -28.1924, 1e-3);
+ EXPECT_NEAR(-d[3], -2.2929, 1e-3);
+
+ EXPECT_NEAR(d[4], 18, 1e-3);
+ EXPECT_NEAR(-d[5], 5, 1e-3);
+
+ EXPECT_NEAR(d[6], -9.8076, 1e-3);
+ EXPECT_NEAR(-d[7], 3.7071, 1e-3);
+ }
+}
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/CMakeLists.txt
new file mode 100644
index 0000000..60d6382
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(csrc)
+add_subdirectory(tests)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/CMakeLists.txt
new file mode 100644
index 0000000..16bee54
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/CMakeLists.txt
@@ -0,0 +1,28 @@
+pybind11_add_module(_kaldi_native_fbank
+ feature-fbank.cc
+ feature-window.cc
+ kaldi-native-fbank.cc
+ mel-computations.cc
+ online-feature.cc
+ utils.cc
+)
+
+if(APPLE)
+ execute_process(
+ COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
+ )
+ message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
+ target_link_libraries(_kaldi_native_fbank PRIVATE "-Wl,-rpath,${PYTHON_SITE_PACKAGE_DIR}")
+endif()
+
+if(NOT WIN32)
+ target_link_libraries(_kaldi_native_fbank PRIVATE "-Wl,-rpath,${kaldi_native_fbank_rpath_origin}/kaldi_native_fbank/lib")
+endif()
+
+target_link_libraries(_kaldi_native_fbank PRIVATE kaldi-native-fbank-core)
+
+install(TARGETS _kaldi_native_fbank
+ DESTINATION ../
+)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.cc
new file mode 100644
index 0000000..4f32895
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.cc
@@ -0,0 +1,57 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/python/csrc/feature-fbank.h"
+
+#include <memory>
+#include <string>
+
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+#include "kaldi-native-fbank/python/csrc/utils.h"
+
+namespace knf {
+
+static void PybindFbankOptions(py::module &m) { // NOLINT
+ using PyClass = FbankOptions;
+ py::class_<PyClass>(m, "FbankOptions")
+ .def(py::init<>())
+ .def_readwrite("frame_opts", &PyClass::frame_opts)
+ .def_readwrite("mel_opts", &PyClass::mel_opts)
+ .def_readwrite("use_energy", &PyClass::use_energy)
+ .def_readwrite("energy_floor", &PyClass::energy_floor)
+ .def_readwrite("raw_energy", &PyClass::raw_energy)
+ .def_readwrite("htk_compat", &PyClass::htk_compat)
+ .def_readwrite("use_log_fbank", &PyClass::use_log_fbank)
+ .def_readwrite("use_power", &PyClass::use_power)
+ .def("__str__",
+ [](const PyClass &self) -> std::string { return self.ToString(); })
+ .def("as_dict",
+ [](const PyClass &self) -> py::dict { return AsDict(self); })
+ .def_static(
+ "from_dict",
+ [](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); })
+ .def(py::pickle(
+ [](const PyClass &self) -> py::dict { return AsDict(self); },
+ [](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); }));
+}
+
+void PybindFeatureFbank(py::module &m) { // NOLINT
+ PybindFbankOptions(m);
+}
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.h
new file mode 100644
index 0000000..6490c22
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.h
@@ -0,0 +1,30 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
+#define KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
+
+#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
+
+namespace knf {
+
+void PybindFeatureFbank(py::module &m); // NOLINT
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.cc
new file mode 100644
index 0000000..1f3b0a5
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.cc
@@ -0,0 +1,67 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/python/csrc/feature-window.h"
+
+#include <string>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+#include "kaldi-native-fbank/python/csrc/utils.h"
+
+namespace knf {
+
+static void PybindFrameExtractionOptions(py::module &m) { // NOLINT
+ using PyClass = FrameExtractionOptions;
+ py::class_<PyClass>(m, "FrameExtractionOptions")
+ .def(py::init<>())
+ .def_readwrite("samp_freq", &PyClass::samp_freq)
+ .def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms)
+ .def_readwrite("frame_length_ms", &PyClass::frame_length_ms)
+ .def_readwrite("dither", &PyClass::dither)
+ .def_readwrite("preemph_coeff", &PyClass::preemph_coeff)
+ .def_readwrite("remove_dc_offset", &PyClass::remove_dc_offset)
+ .def_readwrite("window_type", &PyClass::window_type)
+ .def_readwrite("round_to_power_of_two", &PyClass::round_to_power_of_two)
+ .def_readwrite("blackman_coeff", &PyClass::blackman_coeff)
+ .def_readwrite("snip_edges", &PyClass::snip_edges)
+ .def_readwrite("max_feature_vectors", &PyClass::max_feature_vectors)
+ .def("as_dict",
+ [](const PyClass &self) -> py::dict { return AsDict(self); })
+ .def_static("from_dict",
+ [](py::dict dict) -> PyClass {
+ return FrameExtractionOptionsFromDict(dict);
+ })
+#if 0
+ .def_readwrite("allow_downsample",
+ &PyClass::allow_downsample)
+ .def_readwrite("allow_upsample", &PyClass::allow_upsample)
+#endif
+ .def("__str__",
+ [](const PyClass &self) -> std::string { return self.ToString(); })
+ .def(py::pickle(
+ [](const PyClass &self) -> py::dict { return AsDict(self); },
+ [](py::dict dict) -> PyClass {
+ return FrameExtractionOptionsFromDict(dict);
+ }));
+}
+
+void PybindFeatureWindow(py::module &m) { // NOLINT
+ PybindFrameExtractionOptions(m);
+}
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.h
new file mode 100644
index 0000000..aba5b9e
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.h
@@ -0,0 +1,30 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
+#define KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
+
+#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
+
+namespace knf {
+
+void PybindFeatureWindow(py::module &m); // NOLINT
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.cc
new file mode 100644
index 0000000..47a2d8e
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.cc
@@ -0,0 +1,37 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
+
+#include "kaldi-native-fbank/python/csrc/feature-fbank.h"
+#include "kaldi-native-fbank/python/csrc/feature-window.h"
+#include "kaldi-native-fbank/python/csrc/mel-computations.h"
+#include "kaldi-native-fbank/python/csrc/online-feature.h"
+
+namespace knf {
+
+PYBIND11_MODULE(_kaldi_native_fbank, m) {
+ m.doc() = "Python wrapper for kaldi native fbank";
+ PybindFeatureWindow(m);
+ PybindMelComputations(m);
+ PybindFeatureFbank(m);
+
+ PybindOnlineFeature(m);
+}
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.h
new file mode 100644
index 0000000..756f4ce
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.h
@@ -0,0 +1,27 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
+#define KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
+
+#include "pybind11/numpy.h"
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+namespace py = pybind11;
+
+#endif // KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.cc
new file mode 100644
index 0000000..2970b47
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.cc
@@ -0,0 +1,58 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/python/csrc/mel-computations.h"
+
+#include <string>
+
+#include "kaldi-native-fbank/csrc/mel-computations.h"
+#include "kaldi-native-fbank/python/csrc/utils.h"
+
+namespace knf {
+
+static void PybindMelBanksOptions(py::module &m) { // NOLINT
+ using PyClass = MelBanksOptions;
+ py::class_<PyClass>(m, "MelBanksOptions")
+ .def(py::init<>())
+ .def_readwrite("num_bins", &PyClass::num_bins)
+ .def_readwrite("low_freq", &PyClass::low_freq)
+ .def_readwrite("high_freq", &PyClass::high_freq)
+ .def_readwrite("vtln_low", &PyClass::vtln_low)
+ .def_readwrite("vtln_high", &PyClass::vtln_high)
+ .def_readwrite("debug_mel", &PyClass::debug_mel)
+ .def_readwrite("htk_mode", &PyClass::htk_mode)
+ .def("__str__",
+ [](const PyClass &self) -> std::string { return self.ToString(); })
+ .def("as_dict",
+ [](const PyClass &self) -> py::dict { return AsDict(self); })
+ .def_static("from_dict",
+ [](py::dict dict) -> PyClass {
+ return MelBanksOptionsFromDict(dict);
+ })
+ .def(py::pickle(
+ [](const PyClass &self) -> py::dict { return AsDict(self); },
+ [](py::dict dict) -> PyClass {
+ return MelBanksOptionsFromDict(dict);
+ }));
+}
+
+void PybindMelComputations(py::module &m) { // NOLINT
+ PybindMelBanksOptions(m);
+}
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.h
new file mode 100644
index 0000000..2ca9ac7
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.h
@@ -0,0 +1,30 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
+#define KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
+
+#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
+
+namespace knf {
+
+void PybindMelComputations(py::module &m); // NOLINT
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.cc
new file mode 100644
index 0000000..46296d1
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.cc
@@ -0,0 +1,68 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/python/csrc/online-feature.h"
+
+#include <string>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/online-feature.h"
+namespace knf {
+
+template <typename C>
+void PybindOnlineFeatureTpl(py::module &m, // NOLINT
+ const std::string &class_name,
+ const std::string &class_help_doc = "") {
+ using PyClass = OnlineGenericBaseFeature<C>;
+ using Options = typename C::Options;
+ py::class_<PyClass>(m, class_name.c_str(), class_help_doc.c_str())
+ .def(py::init<const Options &>(), py::arg("opts"))
+ .def_property_readonly("dim", &PyClass::Dim)
+ .def_property_readonly("frame_shift_in_seconds",
+ &PyClass::FrameShiftInSeconds)
+ .def_property_readonly("num_frames_ready", &PyClass::NumFramesReady)
+ .def("is_last_frame", &PyClass::IsLastFrame, py::arg("frame"))
+ .def(
+ "get_frame",
+ [](py::object obj, int32_t frame) {
+ auto *self = obj.cast<PyClass *>();
+ const float *f = self->GetFrame(frame);
+ return py::array_t<float>({self->Dim()}, // shape
+ {sizeof(float)}, // stride in bytes
+ f, // ptr
+ obj); // it will increase the reference
+ // count of **this** vector
+ },
+ py::arg("frame"))
+ .def(
+ "accept_waveform",
+ [](PyClass &self, float sampling_rate,
+ const std::vector<float> &waveform) {
+ self.AcceptWaveform(sampling_rate, waveform.data(),
+ waveform.size());
+ },
+ py::arg("sampling_rate"), py::arg("waveform"),
+ py::call_guard<py::gil_scoped_release>())
+ .def("input_finished", &PyClass::InputFinished);
+}
+
+void PybindOnlineFeature(py::module &m) { // NOLINT
+ PybindOnlineFeatureTpl<FbankComputer>(m, "OnlineFbank");
+}
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.h
new file mode 100644
index 0000000..b4a05df
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.h
@@ -0,0 +1,30 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
+#define KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
+
+#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
+
+namespace knf {
+
+void PybindOnlineFeature(py::module &m); // NOLINT
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.cc
new file mode 100644
index 0000000..dc9d236
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.cc
@@ -0,0 +1,136 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/python/csrc/utils.h"
+
+#include <string>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+
+#define FROM_DICT(type, key) \
+ if (dict.contains(#key)) { \
+ opts.key = py::type(dict[#key]); \
+ }
+
+#define AS_DICT(key) dict[#key] = opts.key
+
+namespace knf {
+
+FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict) {
+ FrameExtractionOptions opts;
+
+ FROM_DICT(float_, samp_freq);
+ FROM_DICT(float_, frame_shift_ms);
+ FROM_DICT(float_, frame_length_ms);
+ FROM_DICT(float_, dither);
+ FROM_DICT(float_, preemph_coeff);
+ FROM_DICT(bool_, remove_dc_offset);
+ FROM_DICT(str, window_type);
+ FROM_DICT(bool_, round_to_power_of_two);
+ FROM_DICT(float_, blackman_coeff);
+ FROM_DICT(bool_, snip_edges);
+ FROM_DICT(int_, max_feature_vectors);
+
+ return opts;
+}
+
+py::dict AsDict(const FrameExtractionOptions &opts) {
+ py::dict dict;
+
+ AS_DICT(samp_freq);
+ AS_DICT(frame_shift_ms);
+ AS_DICT(frame_length_ms);
+ AS_DICT(dither);
+ AS_DICT(preemph_coeff);
+ AS_DICT(remove_dc_offset);
+ AS_DICT(window_type);
+ AS_DICT(round_to_power_of_two);
+ AS_DICT(blackman_coeff);
+ AS_DICT(snip_edges);
+ AS_DICT(max_feature_vectors);
+
+ return dict;
+}
+
+MelBanksOptions MelBanksOptionsFromDict(py::dict dict) {
+ MelBanksOptions opts;
+
+ FROM_DICT(int_, num_bins);
+ FROM_DICT(float_, low_freq);
+ FROM_DICT(float_, high_freq);
+ FROM_DICT(float_, vtln_low);
+ FROM_DICT(float_, vtln_high);
+ FROM_DICT(bool_, debug_mel);
+ FROM_DICT(bool_, htk_mode);
+
+ return opts;
+}
+py::dict AsDict(const MelBanksOptions &opts) {
+ py::dict dict;
+
+ AS_DICT(num_bins);
+ AS_DICT(low_freq);
+ AS_DICT(high_freq);
+ AS_DICT(vtln_low);
+ AS_DICT(vtln_high);
+ AS_DICT(debug_mel);
+ AS_DICT(htk_mode);
+
+ return dict;
+}
+
+FbankOptions FbankOptionsFromDict(py::dict dict) {
+ FbankOptions opts;
+
+ if (dict.contains("frame_opts")) {
+ opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
+ }
+
+ if (dict.contains("mel_opts")) {
+ opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
+ }
+
+ FROM_DICT(bool_, use_energy);
+ FROM_DICT(float_, energy_floor);
+ FROM_DICT(bool_, raw_energy);
+ FROM_DICT(bool_, htk_compat);
+ FROM_DICT(bool_, use_log_fbank);
+ FROM_DICT(bool_, use_power);
+
+ return opts;
+}
+
+py::dict AsDict(const FbankOptions &opts) {
+ py::dict dict;
+
+ dict["frame_opts"] = AsDict(opts.frame_opts);
+ dict["mel_opts"] = AsDict(opts.mel_opts);
+ AS_DICT(use_energy);
+ AS_DICT(energy_floor);
+ AS_DICT(raw_energy);
+ AS_DICT(htk_compat);
+ AS_DICT(use_log_fbank);
+ AS_DICT(use_power);
+
+ return dict;
+}
+
+#undef FROM_DICT
+#undef AS_DICT
+
+} // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.h
new file mode 100644
index 0000000..bb78165
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.h
@@ -0,0 +1,52 @@
+/**
+ * Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
+#define KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
+
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+#include "kaldi-native-fbank/csrc/feature-window.h"
+#include "kaldi-native-fbank/csrc/mel-computations.h"
+#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
+
+/*
+ * This file contains code about `from_dict` and
+ * `as_dict` for various options in kaldi-native-fbank.
+ *
+ * Regarding `from_dict`, users don't need to provide
+ * all the fields in the options. If some fields
+ * are not provided, it just uses the default one.
+ *
+ * If the provided dict in `from_dict` is empty,
+ * all fields use their default values.
+ */
+
+namespace knf {
+
+FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict);
+py::dict AsDict(const FrameExtractionOptions &opts);
+
+MelBanksOptions MelBanksOptionsFromDict(py::dict dict);
+py::dict AsDict(const MelBanksOptions &opts);
+
+FbankOptions FbankOptionsFromDict(py::dict dict);
+py::dict AsDict(const FbankOptions &opts);
+
+} // namespace knf
+
+#endif // KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/kaldi_native_fbank/__init__.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/kaldi_native_fbank/__init__.py
new file mode 100644
index 0000000..598f022
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/kaldi_native_fbank/__init__.py
@@ -0,0 +1,6 @@
+from _kaldi_native_fbank import (
+ FrameExtractionOptions,
+ MelBanksOptions,
+ OnlineFbank,
+ FbankOptions,
+)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/CMakeLists.txt
new file mode 100644
index 0000000..2c02a84
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/CMakeLists.txt
@@ -0,0 +1,31 @@
+function(kaldi_native_fbank_add_py_test source)
+ get_filename_component(name ${source} NAME_WE)
+ set(name "${name}_py")
+
+ message(STATUS "source: ${source}")
+
+ add_test(NAME ${name}
+ COMMAND
+ "${PYTHON_EXECUTABLE}"
+ "${CMAKE_CURRENT_SOURCE_DIR}/${source}"
+ )
+
+ get_filename_component(kaldi_native_fbank_path ${CMAKE_CURRENT_LIST_DIR} DIRECTORY)
+
+ set_property(TEST ${name}
+ PROPERTY ENVIRONMENT "PYTHONPATH=${kaldi_native_fbank_path}:$<TARGET_FILE_DIR:_kaldi_native_fbank>:$ENV{PYTHONPATH}"
+ )
+endfunction()
+
+# please sort the files in alphabetic order
+set(py_test_files
+ test_frame_extraction_options.py
+ test_mel_bank_options.py
+ test_fbank_options.py
+)
+
+if(KALDI_NATIVE_FBANK_BUILD_TESTS)
+ foreach(source IN LISTS py_test_files)
+ kaldi_native_fbank_add_py_test(${source})
+ endforeach()
+endif()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_fbank_options.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_fbank_options.py
new file mode 100755
index 0000000..d468912
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_fbank_options.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
+
+
+import pickle
+
+import kaldi_native_fbank as knf
+
+
+def test_default():
+ opts = knf.FbankOptions()
+ assert opts.frame_opts.samp_freq == 16000
+ assert opts.frame_opts.frame_shift_ms == 10.0
+ assert opts.frame_opts.frame_length_ms == 25.0
+ assert opts.frame_opts.dither == 1.0
+ assert abs(opts.frame_opts.preemph_coeff - 0.97) < 1e-6
+ assert opts.frame_opts.remove_dc_offset is True
+ assert opts.frame_opts.window_type == "povey"
+ assert opts.frame_opts.round_to_power_of_two is True
+ assert abs(opts.frame_opts.blackman_coeff - 0.42) < 1e-6
+ assert opts.frame_opts.snip_edges is True
+
+ assert opts.mel_opts.num_bins == 23
+ assert opts.mel_opts.low_freq == 20
+ assert opts.mel_opts.high_freq == 0
+ assert opts.mel_opts.vtln_low == 100
+ assert opts.mel_opts.vtln_high == -500
+ assert opts.mel_opts.debug_mel is False
+ assert opts.mel_opts.htk_mode is False
+
+ assert opts.use_energy is False
+ assert opts.energy_floor == 0.0
+ assert opts.raw_energy is True
+ assert opts.htk_compat is False
+ assert opts.use_log_fbank is True
+ assert opts.use_power is True
+
+
+def test_set_get():
+ opts = knf.FbankOptions()
+ opts.use_energy = True
+ assert opts.use_energy is True
+
+ opts.energy_floor = 1
+ assert opts.energy_floor == 1
+
+ opts.raw_energy = False
+ assert opts.raw_energy is False
+
+ opts.htk_compat = True
+ assert opts.htk_compat is True
+
+ opts.use_log_fbank = False
+ assert opts.use_log_fbank is False
+
+ opts.use_power = False
+ assert opts.use_power is False
+
+
+def test_set_get_frame_opts():
+ opts = knf.FbankOptions()
+
+ opts.frame_opts.samp_freq = 44100
+ assert opts.frame_opts.samp_freq == 44100
+
+ opts.frame_opts.frame_shift_ms = 20.5
+ assert opts.frame_opts.frame_shift_ms == 20.5
+
+ opts.frame_opts.frame_length_ms = 1
+ assert opts.frame_opts.frame_length_ms == 1
+
+ opts.frame_opts.dither = 0.5
+ assert opts.frame_opts.dither == 0.5
+
+ opts.frame_opts.preemph_coeff = 0.25
+ assert opts.frame_opts.preemph_coeff == 0.25
+
+ opts.frame_opts.remove_dc_offset = False
+ assert opts.frame_opts.remove_dc_offset is False
+
+ opts.frame_opts.window_type = "hanning"
+ assert opts.frame_opts.window_type == "hanning"
+
+ opts.frame_opts.round_to_power_of_two = False
+ assert opts.frame_opts.round_to_power_of_two is False
+
+ opts.frame_opts.blackman_coeff = 0.25
+ assert opts.frame_opts.blackman_coeff == 0.25
+
+ opts.frame_opts.snip_edges = False
+ assert opts.frame_opts.snip_edges is False
+
+
+def test_set_get_mel_opts():
+ opts = knf.FbankOptions()
+
+ opts.mel_opts.num_bins = 100
+ assert opts.mel_opts.num_bins == 100
+
+ opts.mel_opts.low_freq = 22
+ assert opts.mel_opts.low_freq == 22
+
+ opts.mel_opts.high_freq = 1
+ assert opts.mel_opts.high_freq == 1
+
+ opts.mel_opts.vtln_low = 101
+ assert opts.mel_opts.vtln_low == 101
+
+ opts.mel_opts.vtln_high = -100
+ assert opts.mel_opts.vtln_high == -100
+
+ opts.mel_opts.debug_mel = True
+ assert opts.mel_opts.debug_mel is True
+
+ opts.mel_opts.htk_mode = True
+ assert opts.mel_opts.htk_mode is True
+
+
+def test_from_empty_dict():
+ opts = knf.FbankOptions.from_dict({})
+ opts2 = knf.FbankOptions()
+
+ assert str(opts) == str(opts2)
+
+
+def test_from_dict_partial():
+ d = {
+ "energy_floor": 10.5,
+ "htk_compat": True,
+ "mel_opts": {"num_bins": 80, "vtln_low": 1},
+ "frame_opts": {"window_type": "hanning"},
+ }
+ opts = knf.FbankOptions.from_dict(d)
+ assert opts.energy_floor == 10.5
+ assert opts.htk_compat is True
+ assert opts.mel_opts.num_bins == 80
+ assert opts.mel_opts.vtln_low == 1
+ assert opts.frame_opts.window_type == "hanning"
+
+ mel_opts = knf.MelBanksOptions.from_dict(d["mel_opts"])
+ assert str(opts.mel_opts) == str(mel_opts)
+
+
+def test_from_dict_full_and_as_dict():
+ opts = knf.FbankOptions()
+ opts.htk_compat = True
+ opts.mel_opts.num_bins = 80
+ opts.frame_opts.samp_freq = 10
+
+ d = opts.as_dict()
+ assert d["htk_compat"] is True
+ assert d["mel_opts"]["num_bins"] == 80
+ assert d["frame_opts"]["samp_freq"] == 10
+
+ mel_opts = knf.MelBanksOptions()
+ mel_opts.num_bins = 80
+ assert d["mel_opts"] == mel_opts.as_dict()
+
+ frame_opts = knf.FrameExtractionOptions()
+ frame_opts.samp_freq = 10
+ assert d["frame_opts"] == frame_opts.as_dict()
+
+ opts2 = knf.FbankOptions.from_dict(d)
+ assert str(opts2) == str(opts)
+
+ d["htk_compat"] = False
+ opts3 = knf.FbankOptions.from_dict(d)
+ assert opts3.htk_compat is False
+
+
+def test_pickle():
+ opts = knf.FbankOptions()
+ opts.use_energy = True
+ opts.use_power = False
+
+ opts.frame_opts.samp_freq = 44100
+ opts.mel_opts.num_bins = 100
+
+ data = pickle.dumps(opts)
+
+ opts2 = pickle.loads(data)
+ assert str(opts) == str(opts2)
+
+
+def main():
+ test_default()
+ test_set_get()
+ test_set_get_frame_opts()
+ test_set_get_mel_opts()
+ test_from_empty_dict()
+ test_from_dict_partial()
+ test_from_dict_full_and_as_dict()
+ test_pickle()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_frame_extraction_options.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_frame_extraction_options.py
new file mode 100755
index 0000000..2b16efe
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_frame_extraction_options.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang)
+
+import pickle
+
+import kaldi_native_fbank as knf
+
+
+def test_default():
+ opts = knf.FrameExtractionOptions()
+ assert opts.samp_freq == 16000
+ assert opts.frame_shift_ms == 10.0
+ assert opts.frame_length_ms == 25.0
+ assert opts.dither == 1.0
+ assert abs(opts.preemph_coeff - 0.97) < 1e-6
+ assert opts.remove_dc_offset is True
+ assert opts.window_type == "povey"
+ assert opts.round_to_power_of_two is True
+ assert abs(opts.blackman_coeff - 0.42) < 1e-6
+ assert opts.snip_edges is True
+
+
+def test_set_get():
+ opts = knf.FrameExtractionOptions()
+ opts.samp_freq = 44100
+ assert opts.samp_freq == 44100
+
+ opts.frame_shift_ms = 20.5
+ assert opts.frame_shift_ms == 20.5
+
+ opts.frame_length_ms = 1
+ assert opts.frame_length_ms == 1
+
+ opts.dither = 0.5
+ assert opts.dither == 0.5
+
+ opts.preemph_coeff = 0.25
+ assert opts.preemph_coeff == 0.25
+
+ opts.remove_dc_offset = False
+ assert opts.remove_dc_offset is False
+
+ opts.window_type = "hanning"
+ assert opts.window_type == "hanning"
+
+ opts.round_to_power_of_two = False
+ assert opts.round_to_power_of_two is False
+
+ opts.blackman_coeff = 0.25
+ assert opts.blackman_coeff == 0.25
+
+ opts.snip_edges = False
+ assert opts.snip_edges is False
+
+
+def test_from_empty_dict():
+ opts = knf.FrameExtractionOptions.from_dict({})
+ opts2 = knf.FrameExtractionOptions()
+
+ assert str(opts) == str(opts2)
+
+
+def test_from_dict_partial():
+ d = {"samp_freq": 10, "frame_shift_ms": 2}
+
+ opts = knf.FrameExtractionOptions.from_dict(d)
+
+ opts2 = knf.FrameExtractionOptions()
+ assert str(opts) != str(opts2)
+
+ opts2.samp_freq = 10
+ assert str(opts) != str(opts2)
+
+ opts2.frame_shift_ms = 2
+ assert str(opts) == str(opts2)
+
+ opts2.frame_shift_ms = 3
+ assert str(opts) != str(opts2)
+
+
+def test_from_dict_full_and_as_dict():
+ opts = knf.FrameExtractionOptions()
+ opts.samp_freq = 20
+ opts.frame_length_ms = 100
+
+ d = opts.as_dict()
+ for key, value in d.items():
+ assert value == getattr(opts, key)
+
+ opts2 = knf.FrameExtractionOptions.from_dict(d)
+ assert str(opts2) == str(opts)
+
+ d["window_type"] = "hanning"
+ opts3 = knf.FrameExtractionOptions.from_dict(d)
+ assert opts3.window_type == "hanning"
+
+
+def test_pickle():
+ opts = knf.FrameExtractionOptions()
+ opts.samp_freq = 44100
+ opts.dither = 5.5
+ data = pickle.dumps(opts)
+
+ opts2 = pickle.loads(data)
+ assert str(opts) == str(opts2)
+
+
+def main():
+ test_default()
+ test_set_get()
+ test_from_empty_dict()
+ test_from_dict_partial()
+ test_from_dict_full_and_as_dict()
+ test_pickle()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_mel_bank_options.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_mel_bank_options.py
new file mode 100755
index 0000000..1135c26
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_mel_bank_options.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang)
+
+import pickle
+
+import kaldi_native_fbank as knf
+
+
+def test_default():
+ opts = knf.MelBanksOptions()
+ assert opts.num_bins == 25
+ assert opts.low_freq == 20
+ assert opts.high_freq == 0
+ assert opts.vtln_low == 100
+ assert opts.vtln_high == -500
+ assert opts.debug_mel is False
+ assert opts.htk_mode is False
+
+
+def test_set_get():
+ opts = knf.MelBanksOptions()
+ opts.num_bins = 100
+ assert opts.num_bins == 100
+
+ opts.low_freq = 22
+ assert opts.low_freq == 22
+
+ opts.high_freq = 1
+ assert opts.high_freq == 1
+
+ opts.vtln_low = 101
+ assert opts.vtln_low == 101
+
+ opts.vtln_high = -100
+ assert opts.vtln_high == -100
+
+ opts.debug_mel = True
+ assert opts.debug_mel is True
+
+ opts.htk_mode = True
+ assert opts.htk_mode is True
+
+
+def test_from_empty_dict():
+ opts = knf.MelBanksOptions.from_dict({})
+ opts2 = knf.MelBanksOptions()
+
+ assert str(opts) == str(opts2)
+
+
+def test_from_dict_partial():
+ d = {"num_bins": 10, "debug_mel": True}
+
+ opts = knf.MelBanksOptions.from_dict(d)
+
+ opts2 = knf.MelBanksOptions()
+ assert str(opts) != str(opts2)
+
+ opts2.num_bins = 10
+ assert str(opts) != str(opts2)
+
+ opts2.debug_mel = True
+ assert str(opts) == str(opts2)
+
+ opts2.debug_mel = False
+ assert str(opts) != str(opts2)
+
+
+def test_from_dict_full_and_as_dict():
+ opts = knf.MelBanksOptions()
+ opts.num_bins = 80
+ opts.vtln_high = 2
+
+ d = opts.as_dict()
+ for key, value in d.items():
+ assert value == getattr(opts, key)
+
+ opts2 = knf.MelBanksOptions.from_dict(d)
+ assert str(opts2) == str(opts)
+
+ d["htk_mode"] = True
+ opts3 = knf.MelBanksOptions.from_dict(d)
+ assert opts3.htk_mode is True
+
+
+def test_pickle():
+ opts = knf.MelBanksOptions()
+ opts.num_bins = 100
+ opts.low_freq = 22
+ data = pickle.dumps(opts)
+
+ opts2 = pickle.loads(data)
+ assert str(opts) == str(opts2)
+
+
+def main():
+ test_default()
+ test_set_get()
+ test_from_empty_dict()
+ test_from_dict_partial()
+ test_from_dict_full_and_as_dict()
+ test_pickle()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_online_fbank.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_online_fbank.py
new file mode 100755
index 0000000..12f2c66
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_online_fbank.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+import sys
+
+try:
+ import kaldifeat
+except:
+ print("Please install kaldifeat first")
+ sys.exit(0)
+
+import kaldi_native_fbank as knf
+import torch
+
+
+def main():
+ sampling_rate = 16000
+ samples = torch.randn(16000 * 10)
+
+ opts = kaldifeat.FbankOptions()
+ opts.frame_opts.dither = 0
+ opts.mel_opts.num_bins = 80
+ opts.frame_opts.snip_edges = False
+ opts.mel_opts.debug_mel = False
+
+ online_fbank = kaldifeat.OnlineFbank(opts)
+
+ online_fbank.accept_waveform(sampling_rate, samples)
+
+ opts = knf.FbankOptions()
+ opts.frame_opts.dither = 0
+ opts.mel_opts.num_bins = 80
+ opts.frame_opts.snip_edges = False
+ opts.mel_opts.debug_mel = False
+
+ fbank = knf.OnlineFbank(opts)
+ fbank.accept_waveform(sampling_rate, samples.tolist())
+
+ assert online_fbank.num_frames_ready == fbank.num_frames_ready
+ for i in range(fbank.num_frames_ready):
+ f1 = online_fbank.get_frame(i)
+ f2 = torch.from_numpy(fbank.get_frame(i))
+ assert torch.allclose(f1, f2, atol=1e-3), (i, (f1 - f2).abs().max())
+
+
+if __name__ == "__main__":
+ torch.manual_seed(20220825)
+ main()
+ print("success")
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/check_style_cpplint.sh b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/check_style_cpplint.sh
new file mode 100755
index 0000000..2fc150e
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/check_style_cpplint.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+#
+# Copyright 2020 Mobvoi Inc. (authors: Fangjun Kuang)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Usage:
+#
+# (1) To check files of the last commit
+# ./scripts/check_style_cpplint.sh
+#
+# (2) To check changed files not committed yet
+# ./scripts/check_style_cpplint.sh 1
+#
+# (3) To check all files in the project
+# ./scripts/check_style_cpplint.sh 2
+
+
+cpplint_version="1.5.4"
+cur_dir=$(cd $(dirname $BASH_SOURCE) && pwd)
+kaldi_native_fbank_dir=$(cd $cur_dir/.. && pwd)
+
+build_dir=$kaldi_native_fbank_dir/build
+mkdir -p $build_dir
+
+cpplint_src=$build_dir/cpplint-${cpplint_version}/cpplint.py
+
+if [ ! -d "$build_dir/cpplint-${cpplint_version}" ]; then
+ pushd $build_dir
+ if command -v wget &> /dev/null; then
+ wget https://github.com/cpplint/cpplint/archive/${cpplint_version}.tar.gz
+ elif command -v curl &> /dev/null; then
+ curl -O -SL https://github.com/cpplint/cpplint/archive/${cpplint_version}.tar.gz
+ else
+ echo "Please install wget or curl to download cpplint"
+ exit 1
+ fi
+ tar xf ${cpplint_version}.tar.gz
+ rm ${cpplint_version}.tar.gz
+
+ # cpplint will report the following error for: __host__ __device__ (
+ #
+ # Extra space before ( in function call [whitespace/parens] [4]
+ #
+ # the following patch disables the above error
+ sed -i "3490i\ not Search(r'__host__ __device__\\\s+\\\(', fncall) and" $cpplint_src
+ popd
+fi
+
+source $kaldi_native_fbank_dir/scripts/utils.sh
+
+# return true if the given file is a c++ source file
+# return false otherwise
+function is_source_code_file() {
+ case "$1" in
+ *.cc|*.h|*.cu)
+ echo true;;
+ *)
+ echo false;;
+ esac
+}
+
+function check_style() {
+ python3 $cpplint_src $1 || abort $1
+}
+
+function check_last_commit() {
+ files=$(git diff HEAD^1 --name-only --diff-filter=ACDMRUXB)
+ echo $files
+}
+
+function check_current_dir() {
+ files=$(git status -s -uno --porcelain | awk '{
+ if (NF == 4) {
+ # a file has been renamed
+ print $NF
+ } else {
+ print $2
+ }}')
+
+ echo $files
+}
+
+function do_check() {
+ case "$1" in
+ 1)
+ echo "Check changed files"
+ files=$(check_current_dir)
+ ;;
+ 2)
+ echo "Check all files"
+ files=$(find $kaldi_native_fbank_dir/kaldi-native-fbank -name "*.h" -o -name "*.cc" -o -name "*.cu")
+ ;;
+ *)
+ echo "Check last commit"
+ files=$(check_last_commit)
+ ;;
+ esac
+
+ for f in $files; do
+ need_check=$(is_source_code_file $f)
+ if $need_check; then
+ [[ -f $f ]] && check_style $f
+ fi
+ done
+}
+
+function main() {
+ do_check $1
+
+ ok "Great! Style check passed!"
+}
+
+cd $kaldi_native_fbank_dir
+
+main $1
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/utils.sh b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/utils.sh
new file mode 100644
index 0000000..fb424a7
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/utils.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+default='\033[0m'
+bold='\033[1m'
+red='\033[31m'
+green='\033[32m'
+
+function ok() {
+ printf "${bold}${green}[OK]${default} $1\n"
+}
+
+function error() {
+ printf "${bold}${red}[FAILED]${default} $1\n"
+}
+
+function abort() {
+ printf "${bold}${red}[FAILED]${default} $1\n"
+ exit 1
+}
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/setup.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/setup.py
new file mode 100644
index 0000000..de3d8a2
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/setup.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Xiaomi Corporation (author: Fangjun Kuang)
+
+import re
+
+import setuptools
+
+from cmake.cmake_extension import BuildExtension, bdist_wheel, cmake_extension
+
+
+def read_long_description():
+ with open("README.md", encoding="utf8") as f:
+ readme = f.read()
+ return readme
+
+
+def get_package_version():
+ with open("CMakeLists.txt") as f:
+ content = f.read()
+
+ match = re.search(r"set\(KALDI_NATIVE_FBANK_VERSION (.*)\)", content)
+ latest_version = match.group(1).strip('"')
+ return latest_version
+
+
+package_name = "kaldi-native-fbank"
+
+with open("kaldi-native-fbank/python/kaldi_native_fbank/__init__.py", "a") as f:
+ f.write(f"__version__ = '{get_package_version()}'\n")
+
+setuptools.setup(
+ name=package_name,
+ version=get_package_version(),
+ author="Fangjun Kuang",
+ author_email="csukuangfj@gmail.com",
+ package_dir={"kaldi_native_fbank": "kaldi-native-fbank/python/kaldi_native_fbank"},
+ packages=["kaldi_native_fbank"],
+ url="https://github.com/csukuangfj/kaldi-native-fbank",
+ long_description=read_long_description(),
+ long_description_content_type="text/markdown",
+ ext_modules=[cmake_extension("_kaldi_native_fbank")],
+ cmdclass={"build_ext": BuildExtension, "bdist_wheel": bdist_wheel},
+ zip_safe=False,
+ classifiers=[
+ "Programming Language :: C++",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3",
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
+ ],
+ python_requires=">=3.6.0",
+ license="Apache licensed, as found in the LICENSE file",
+)
+
+# remove the line __version__ from kaldi-native-fbank/python/kaldi_native_fbank/__init__.py
+with open("kaldi-native-fbank/python/kaldi_native_fbank/__init__.py", "r") as f:
+ lines = f.readlines()
+
+with open("kaldi-native-fbank/python/kaldi_native_fbank/__init__.py", "w") as f:
+ for line in lines:
+ if "__version__" in line:
+ # skip __version__ = "x.x.x"
+ continue
+ f.write(line)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/README.md b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/README.md
new file mode 100644
index 0000000..41924c0
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/README.md
@@ -0,0 +1,11 @@
+# gcc-arm-8.3-2019.03-x86_64-arm-linux-gnueabihf.tar.xz
+
+Go to <https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads/8-3-2019-03> to download the toolchain.
+
+```bash
+mkdir /ceph-fj/fangjun/software
+cd /ceph-fj/fangjun/software
+tar xvf /path/to/gcc-arm-8.3-2019.03-x86_64-arm-linux-gnueabihf.tar.xz
+
+export PATH=/ceph-fj/fangjun/software/gcc-arm-8.3-2019.03-x86_64-arm-linux-gnueabihf/bin:$PATH
+```
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/arm-linux-gnueabihf.toolchain.cmake b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/arm-linux-gnueabihf.toolchain.cmake
new file mode 100644
index 0000000..abe1a22
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/arm-linux-gnueabihf.toolchain.cmake
@@ -0,0 +1,17 @@
+# Copied from https://github.com/Tencent/ncnn/blob/master/toolchains/arm-linux-gnueabihf.toolchain.cmake
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR arm)
+
+set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
+set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon")
+set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon")
+
+# cache flags
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")
diff --git a/funasr/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt
index 37d1624..491f41a 100644
--- a/funasr/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt
@@ -38,9 +38,9 @@
### Project options
###
## Project stuff
-option(YAML_CPP_BUILD_TESTS "Enable testing" ON)
-option(YAML_CPP_BUILD_TOOLS "Enable parse tools" ON)
-option(YAML_CPP_BUILD_CONTRIB "Enable contrib stuff in library" ON)
+option(YAML_CPP_BUILD_TESTS "Enable testing" OFF)
+option(YAML_CPP_BUILD_TOOLS "Enable parse tools" OFF)
+option(YAML_CPP_BUILD_CONTRIB "Enable contrib stuff in library" OFF)
## Build options
# --> General
--
Gitblit v1.9.1