From 7ab2e5cf22bbb31808bcacf84c054c710e4e6a93 Mon Sep 17 00:00:00 2001
From: Yabin Li <wucong.lyb@alibaba-inc.com>
Date: 星期一, 24 四月 2023 16:19:17 +0800
Subject: [PATCH] Merge pull request #400 from alibaba-damo-academy/dev_knf

---
 funasr/runtime/onnxruntime/src/ct-transformer.cpp                                                                          |  188 
 funasr/runtime/onnxruntime/readme.md                                                                                       |   10 
 funasr/runtime/onnxruntime/src/model.cpp                                                                                   |    8 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/CMakeLists.txt                    |   28 
 funasr/runtime/onnxruntime/src/online-feature.h                                                                            |   51 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.cc                                   |  142 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-feature.cc                   |   59 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent.cmake                                 |  916 ++++
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/LICENSE                                                          |  211 +
 funasr/runtime/onnxruntime/include/libfunasrapi.h                                                                          |   36 
 funasr/runtime/onnxruntime/src/tokenizer.h                                                                                 |   27 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/utils.sh                                                 |   19 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.cc                 |   67 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.h                         |  142 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.cc             |   37 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.h                  |   30 
 funasr/runtime/onnxruntime/src/precomp.h                                                                                   |   36 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/linux-macos.yaml                               |   70 
 funasr/runtime/onnxruntime/src/audio.cpp                                                                                   |  150 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.cc                  |   57 
 funasr/runtime/onnxruntime/src/e2e-vad.h                                                                                   |  797 +++
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/CMakeLists.txt                   |   31 
 funasr/runtime/onnxruntime/src/fsmn-vad.h                                                                                  |   60 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/setup.py                                                         |   64 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_frame_extraction_options.py |  119 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.h              |   27 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.cc               |   58 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_online_fbank.py             |   48 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/README.md                                          |    5 
 funasr/runtime/onnxruntime/include/model.h                                                                                 |   19 
 funasr/runtime/onnxruntime/src/ct-transformer.h                                                                            |   26 
 funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/feature-fbank.h                                                 |  134 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-rfft.cc                             |   52 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/arm-linux-gnueabihf.toolchain.cmake                   |   17 
 funasr/runtime/onnxruntime/include/audio.h                                                                                 |   62 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.h                                    |  383 +
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/MANIFEST.in                                                      |    5 
 funasr/runtime/onnxruntime/CMakeLists.txt                                                                                  |    6 
 funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp                                                                 |  140 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.cc                     |   49 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.cc                      |  257 +
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.h                  |   30 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-log.cc                              |   73 
 funasr/runtime/onnxruntime/src/paraformer.h                                                                                |   58 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/CMakeLists.txt                                                   |  115 
 funasr/runtime/onnxruntime/src/funasr-onnx-offline.cpp                                                                     |   38 
 funasr/runtime/onnxruntime/src/paraformer.cpp                                                                              |  262 +
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.cc                                  |   67 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-fbank.cc                     |   48 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent/CMakeLists.cmake.in                   |   21 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.h                         |  178 
 funasr/runtime/onnxruntime/src/alignedmem.cpp                                                                              |    4 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/fftsg.c                                  | 2968 ++++++++++++++
 funasr/runtime/onnxruntime/src/CMakeLists.txt                                                                              |   46 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/README.md                                             |   11 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt                           |    8 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_fbank_options.py            |  198 
 funasr/runtime/onnxruntime/src/fsmn-vad.cpp                                                                                |  273 +
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/README.md                                                        |  106 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.cc                        |  247 +
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.h                                   |   56 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/CMakeLists.txt                         |    2 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_mel_bank_options.py         |  107 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.h                |   30 
 funasr/runtime/onnxruntime/src/tensor.h                                                                                    |    2 
 funasr/runtime/onnxruntime/src/predefine-coe.h                                                                             |    0 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.h                       |  117 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.clang-format                                                    |    9 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/check_style_cpplint.sh                                   |  126 
 funasr/runtime/onnxruntime/include/com-define.h                                                                            |   48 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/cmake_extension.py                                         |  120 
 funasr/runtime/onnxruntime/src/util.h                                                                                      |   26 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.cc                 |   68 
 funasr/runtime/onnxruntime/src/common-struct.h                                                                             |    0 
 funasr/runtime/onnxruntime/src/alignedmem.h                                                                                |    6 
 funasr/runtime/onnxruntime/src/vocab.h                                                                                     |   25 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/googletest.cmake                                           |   57 
 funasr/runtime/onnxruntime/src/tokenizer.cpp                                                                               |  208 +
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.cc                         |  120 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.h                          |  134 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.cc                          |  136 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/pybind11.cmake                                             |   35 
 funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/online-feature.h                                                |  142 
 funasr/runtime/onnxruntime/src/commonfunc.h                                                                                |   24 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.cc                        |  165 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/kaldi_native_fbank/__init__.py         |    6 
 funasr/runtime/onnxruntime/src/libfunasrapi.cpp                                                                            |  192 
 funasr/runtime/onnxruntime/src/online-feature.cpp                                                                          |  129 
 funasr/runtime/onnxruntime/src/util.cpp                                                                                    |   28 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.h                      |   38 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x86.yaml                               |   67 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.h                           |   52 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt.bak                       |   93 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.h                   |   30 
 funasr/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt                                                             |    6 
 /dev/null                                                                                                                  |   99 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/CMakeLists.txt                                |    8 
 funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x64.yaml                               |   97 
 funasr/runtime/onnxruntime/src/vocab.cpp                                                                                   |   40 
 99 files changed, 11,571 insertions(+), 471 deletions(-)

diff --git a/funasr/runtime/onnxruntime/CMakeLists.txt b/funasr/runtime/onnxruntime/CMakeLists.txt
index 6feef92..9879c4a 100644
--- a/funasr/runtime/onnxruntime/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/CMakeLists.txt
@@ -25,6 +25,8 @@
     link_directories(${ONNXRUNTIME_DIR}/lib)
 endif()
 
-add_subdirectory("./third_party/yaml-cpp")
+include_directories(${PROJECT_SOURCE_DIR}/third_party/kaldi-native-fbank)
+
+add_subdirectory(third_party/yaml-cpp)
+add_subdirectory(third_party/kaldi-native-fbank/kaldi-native-fbank/csrc)
 add_subdirectory(src)
-add_subdirectory(tester)
diff --git a/funasr/runtime/onnxruntime/include/Audio.h b/funasr/runtime/onnxruntime/include/Audio.h
deleted file mode 100644
index ec49a9f..0000000
--- a/funasr/runtime/onnxruntime/include/Audio.h
+++ /dev/null
@@ -1,66 +0,0 @@
-
-#ifndef AUDIO_H
-#define AUDIO_H
-
-#include <ComDefine.h>
-#include <queue>
-#include <stdint.h>
-
-#ifndef model_sample_rate
-#define model_sample_rate 16000
-#endif
-#ifndef WAV_HEADER_SIZE
-#define WAV_HEADER_SIZE 44
-#endif
-
-using namespace std;
-
-class AudioFrame {
-  private:
-    int start;
-    int end;
-    int len;
-
-  public:
-    AudioFrame();
-    AudioFrame(int len);
-
-    ~AudioFrame();
-    int set_start(int val);
-    int set_end(int val, int max_len);
-    int get_start();
-    int get_len();
-    int disp();
-};
-
-class Audio {
-  private:
-    float *speech_data;
-    int16_t *speech_buff;
-    int speech_len;
-    int speech_align_len;
-    int offset;
-    float align_size;
-    int data_type;
-    queue<AudioFrame *> frame_queue;
-
-  public:
-    Audio(int data_type);
-    Audio(int data_type, int size);
-    ~Audio();
-    void disp();
-    bool loadwav(const char* filename, int32_t* sampling_rate);
-    void wavResample(int32_t sampling_rate, const float *waveform, int32_t n);
-    bool loadwav(const char* buf, int nLen, int32_t* sampling_rate);
-    bool loadpcmwav(const char* buf, int nFileLen, int32_t* sampling_rate);
-    bool loadpcmwav(const char* filename, int32_t* sampling_rate);
-    int fetch_chunck(float *&dout, int len);
-    int fetch(float *&dout, int &len, int &flag);
-    void padding();
-    void split();
-    float get_time_len();
-
-    int get_queue_size() { return (int)frame_queue.size(); }
-};
-
-#endif
diff --git a/funasr/runtime/onnxruntime/include/ComDefine.h b/funasr/runtime/onnxruntime/include/ComDefine.h
deleted file mode 100644
index f131e5e..0000000
--- a/funasr/runtime/onnxruntime/include/ComDefine.h
+++ /dev/null
@@ -1,11 +0,0 @@
-
-#ifndef COMDEFINE_H
-#define COMDEFINE_H
-
-#define S_BEGIN  0
-#define S_MIDDLE 1
-#define S_END    2
-#define S_ALL    3
-#define S_ERR    4
-
-#endif
diff --git a/funasr/runtime/onnxruntime/include/Model.h b/funasr/runtime/onnxruntime/include/Model.h
deleted file mode 100644
index 6f45c38..0000000
--- a/funasr/runtime/onnxruntime/include/Model.h
+++ /dev/null
@@ -1,17 +0,0 @@
-
-#ifndef MODEL_H
-#define MODEL_H
-
-#include <string>
-
-class Model {
-  public:
-    virtual ~Model(){};
-    virtual void reset() = 0;
-    virtual std::string forward_chunk(float *din, int len, int flag) = 0;
-    virtual std::string forward(float *din, int len, int flag) = 0;
-    virtual std::string rescoring() = 0;
-};
-
-Model *create_model(const char *path,int nThread=0,bool quantize=false);
-#endif
diff --git a/funasr/runtime/onnxruntime/include/audio.h b/funasr/runtime/onnxruntime/include/audio.h
new file mode 100644
index 0000000..ab9f420
--- /dev/null
+++ b/funasr/runtime/onnxruntime/include/audio.h
@@ -0,0 +1,62 @@
+
+#ifndef AUDIO_H
+#define AUDIO_H
+
+#include <queue>
+#include <stdint.h>
+#include "model.h"
+
+#ifndef WAV_HEADER_SIZE
+#define WAV_HEADER_SIZE 44
+#endif
+
+using namespace std;
+
+class AudioFrame {
+  private:
+    int start;
+    int end;
+    int len;
+
+  public:
+    AudioFrame();
+    AudioFrame(int len);
+
+    ~AudioFrame();
+    int SetStart(int val);
+    int SetEnd(int val);
+    int GetStart();
+    int GetLen();
+    int Disp();
+};
+
+class Audio {
+  private:
+    float *speech_data;
+    int16_t *speech_buff;
+    int speech_len;
+    int speech_align_len;
+    int offset;
+    float align_size;
+    int data_type;
+    queue<AudioFrame *> frame_queue;
+
+  public:
+    Audio(int data_type);
+    Audio(int data_type, int size);
+    ~Audio();
+    void Disp();
+    bool LoadWav(const char* filename, int32_t* sampling_rate);
+    void WavResample(int32_t sampling_rate, const float *waveform, int32_t n);
+    bool LoadWav(const char* buf, int n_len, int32_t* sampling_rate);
+    bool LoadPcmwav(const char* buf, int n_file_len, int32_t* sampling_rate);
+    bool LoadPcmwav(const char* filename, int32_t* sampling_rate);
+    int FetchChunck(float *&dout, int len);
+    int Fetch(float *&dout, int &len, int &flag);
+    void Padding();
+    void Split(Model* recog_obj);
+    float GetTimeLen();
+    int GetQueueSize() { return (int)frame_queue.size(); }
+};
+
+#endif
diff --git a/funasr/runtime/onnxruntime/include/com-define.h b/funasr/runtime/onnxruntime/include/com-define.h
new file mode 100644
index 0000000..e2c22f4
--- /dev/null
+++ b/funasr/runtime/onnxruntime/include/com-define.h
@@ -0,0 +1,48 @@
+
+#ifndef COMDEFINE_H
+#define COMDEFINE_H
+
+#define S_BEGIN  0
+#define S_MIDDLE 1
+#define S_END    2
+#define S_ALL    3
+#define S_ERR    4
+
+#ifndef MODEL_SAMPLE_RATE
+#define MODEL_SAMPLE_RATE 16000
+#endif
+
+// vad
+#ifndef VAD_SILENCE_DYRATION
+#define VAD_SILENCE_DYRATION 15000
+#endif
+
+#ifndef VAD_MAX_LEN
+#define VAD_MAX_LEN 800
+#endif
+
+#ifndef VAD_SPEECH_NOISE_THRES
+#define VAD_SPEECH_NOISE_THRES 0.9
+#endif
+
+// punc
+#define PUNC_MODEL_FILE  "punc_model.onnx"
+#define PUNC_YAML_FILE "punc.yaml"
+#define UNK_CHAR "<unk>"
+
+#define  INPUT_NUM  2
+#define  INPUT_NAME1 "input"
+#define  INPUT_NAME2 "text_lengths"
+#define  OUTPUT_NAME "logits"
+#define  TOKEN_LEN     20
+
+#define  CANDIDATE_NUM   6
+#define UNKNOW_INDEX 0
+#define NOTPUNC_INDEX 1
+#define COMMA_INDEX 2
+#define PERIOD_INDEX 3
+#define QUESTION_INDEX 4
+#define DUN_INDEX 5
+#define  CACHE_POP_TRIGGER_LIMIT   200
+
+#endif
diff --git a/funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/feature-fbank.h b/funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/feature-fbank.h
new file mode 100644
index 0000000..0786aad
--- /dev/null
+++ b/funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/feature-fbank.h
@@ -0,0 +1,134 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is copied/modified from kaldi/src/feat/feature-fbank.h
+
+#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
+#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+#include "kaldi-native-fbank/csrc/mel-computations.h"
+#include "kaldi-native-fbank/csrc/rfft.h"
+
+namespace knf {
+
+struct FbankOptions {
+  FrameExtractionOptions frame_opts;
+  MelBanksOptions mel_opts;
+  // append an extra dimension with energy to the filter banks
+  bool use_energy = false;
+  float energy_floor = 0.0f;  // active iff use_energy==true
+
+  // If true, compute log_energy before preemphasis and windowing
+  // If false, compute log_energy after preemphasis ans windowing
+  bool raw_energy = true;  // active iff use_energy==true
+
+  // If true, put energy last (if using energy)
+  // If false, put energy first
+  bool htk_compat = false;  // active iff use_energy==true
+
+  // if true (default), produce log-filterbank, else linear
+  bool use_log_fbank = true;
+
+  // if true (default), use power in filterbank
+  // analysis, else magnitude.
+  bool use_power = true;
+
+  FbankOptions() { mel_opts.num_bins = 23; }
+
+  std::string ToString() const {
+    std::ostringstream os;
+    os << "frame_opts: \n";
+    os << frame_opts << "\n";
+    os << "\n";
+
+    os << "mel_opts: \n";
+    os << mel_opts << "\n";
+
+    os << "use_energy: " << use_energy << "\n";
+    os << "energy_floor: " << energy_floor << "\n";
+    os << "raw_energy: " << raw_energy << "\n";
+    os << "htk_compat: " << htk_compat << "\n";
+    os << "use_log_fbank: " << use_log_fbank << "\n";
+    os << "use_power: " << use_power << "\n";
+    return os.str();
+  }
+};
+
+std::ostream &operator<<(std::ostream &os, const FbankOptions &opts);
+
+class FbankComputer {
+ public:
+  using Options = FbankOptions;
+
+  explicit FbankComputer(const FbankOptions &opts);
+  ~FbankComputer();
+
+  int32_t Dim() const {
+    return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
+  }
+
+  // if true, compute log_energy_pre_window but after dithering and dc removal
+  bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
+
+  const FrameExtractionOptions &GetFrameOptions() const {
+    return opts_.frame_opts;
+  }
+
+  const FbankOptions &GetOptions() const { return opts_; }
+
+  /**
+     Function that computes one frame of features from
+     one frame of signal.
+
+     @param [in] signal_raw_log_energy The log-energy of the frame of the signal
+         prior to windowing and pre-emphasis, or
+         log(numeric_limits<float>::min()), whichever is greater.  Must be
+         ignored by this function if this class returns false from
+         this->NeedsRawLogEnergy().
+     @param [in] vtln_warp  The VTLN warping factor that the user wants
+         to be applied when computing features for this utterance.  Will
+         normally be 1.0, meaning no warping is to be done.  The value will
+         be ignored for feature types that don't support VLTN, such as
+         spectrogram features.
+     @param [in] signal_frame  One frame of the signal,
+       as extracted using the function ExtractWindow() using the options
+       returned by this->GetFrameOptions().  The function will use the
+       vector as a workspace, which is why it's a non-const pointer.
+     @param [out] feature  Pointer to a vector of size this->Dim(), to which
+         the computed feature will be written. It should be pre-allocated.
+  */
+  void Compute(float signal_raw_log_energy, float vtln_warp,
+               std::vector<float> *signal_frame, float *feature);
+
+ private:
+  const MelBanks *GetMelBanks(float vtln_warp);
+
+  FbankOptions opts_;
+  float log_energy_floor_;
+  std::map<float, MelBanks *> mel_banks_;  // float is VTLN coefficient.
+  Rfft rfft_;
+};
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
diff --git a/funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/online-feature.h b/funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/online-feature.h
new file mode 100644
index 0000000..5ca5511
--- /dev/null
+++ b/funasr/runtime/onnxruntime/include/kaldi-native-fbank/csrc/online-feature.h
@@ -0,0 +1,142 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// The content in this file is copied/modified from
+// This file is copied/modified from kaldi/src/feat/online-feature.h
+#ifndef KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
+#define KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
+
+#include <cstdint>
+#include <deque>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+
+namespace knf {
+
+/// This class serves as a storage for feature vectors with an option to limit
+/// the memory usage by removing old elements. The deleted frames indices are
+/// "remembered" so that regardless of the MAX_ITEMS setting, the user always
+/// provides the indices as if no deletion was being performed.
+/// This is useful when processing very long recordings which would otherwise
+/// cause the memory to eventually blow up when the features are not being
+/// removed.
+class RecyclingVector {
+ public:
+  /// By default it does not remove any elements.
+  explicit RecyclingVector(int32_t items_to_hold = -1);
+
+  ~RecyclingVector() = default;
+  RecyclingVector(const RecyclingVector &) = delete;
+  RecyclingVector &operator=(const RecyclingVector &) = delete;
+
+  // The pointer is owned by RecyclingVector
+  // Users should not free it
+  const float *At(int32_t index) const;
+
+  void PushBack(std::vector<float> item);
+
+  /// This method returns the size as if no "recycling" had happened,
+  /// i.e. equivalent to the number of times the PushBack method has been
+  /// called.
+  int32_t Size() const;
+
+ private:
+  std::deque<std::vector<float>> items_;
+  int32_t items_to_hold_;
+  int32_t first_available_index_;
+};
+
+/// This is a templated class for online feature extraction;
+/// it's templated on a class like MfccComputer or PlpComputer
+/// that does the basic feature extraction.
+template <class C>
+class OnlineGenericBaseFeature {
+ public:
+  // Constructor from options class
+  explicit OnlineGenericBaseFeature(const typename C::Options &opts);
+
+  int32_t Dim() const { return computer_.Dim(); }
+
+  float FrameShiftInSeconds() const {
+    return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
+  }
+
+  int32_t NumFramesReady() const { return features_.Size(); }
+
+  // Note: IsLastFrame() will only ever return true if you have called
+  // InputFinished() (and this frame is the last frame).
+  bool IsLastFrame(int32_t frame) const {
+    return input_finished_ && frame == NumFramesReady() - 1;
+  }
+
+  const float *GetFrame(int32_t frame) const { return features_.At(frame); }
+
+  // This would be called from the application, when you get
+  // more wave data.  Note: the sampling_rate is only provided so
+  // the code can assert that it matches the sampling rate
+  // expected in the options.
+  //
+  // @param sampling_rate The sampling_rate of the input waveform
+  // @param waveform Pointer to a 1-D array of size n
+  // @param n Number of entries in waveform
+  void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n);
+
+  // InputFinished() tells the class you won't be providing any
+  // more waveform.  This will help flush out the last frame or two
+  // of features, in the case where snip-edges == false; it also
+  // affects the return value of IsLastFrame().
+  void InputFinished();
+
+ private:
+  // This function computes any additional feature frames that it is possible to
+  // compute from 'waveform_remainder_', which at this point may contain more
+  // than just a remainder-sized quantity (because AcceptWaveform() appends to
+  // waveform_remainder_ before calling this function).  It adds these feature
+  // frames to features_, and shifts off any now-unneeded samples of input from
+  // waveform_remainder_ while incrementing waveform_offset_ by the same amount.
+  void ComputeFeatures();
+
+  C computer_;  // class that does the MFCC or PLP or filterbank computation
+
+  FeatureWindowFunction window_function_;
+
+  // features_ is the Mfcc or Plp or Fbank features that we have already
+  // computed.
+
+  RecyclingVector features_;
+
+  // True if the user has called "InputFinished()"
+  bool input_finished_;
+
+  // waveform_offset_ is the number of samples of waveform that we have
+  // already discarded, i.e. that were prior to 'waveform_remainder_'.
+  int64_t waveform_offset_;
+
+  // waveform_remainder_ is a short piece of waveform that we may need to keep
+  // after extracting all the whole frames we can (whatever length of feature
+  // will be required for the next phase of computation).
+  // It is a 1-D tensor
+  std::vector<float> waveform_remainder_;
+};
+
+using OnlineFbank = OnlineGenericBaseFeature<FbankComputer>;
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
diff --git a/funasr/runtime/onnxruntime/include/libfunasrapi.h b/funasr/runtime/onnxruntime/include/libfunasrapi.h
index 9bc37e7..6b6e148 100644
--- a/funasr/runtime/onnxruntime/include/libfunasrapi.h
+++ b/funasr/runtime/onnxruntime/include/libfunasrapi.h
@@ -35,7 +35,6 @@
  RASRM_CTC_GREEDY_SEARCH=0,
  RASRM_CTC_RPEFIX_BEAM_SEARCH = 1,
  RASRM_ATTENSION_RESCORING = 2,
- 
 }FUNASR_MODE;
 
 typedef enum {
@@ -43,33 +42,24 @@
 	FUNASR_MODEL_PADDLE_2 = 1,
 	FUNASR_MODEL_K2 = 2,
 	FUNASR_MODEL_PARAFORMER = 3,
-
 }FUNASR_MODEL_TYPE;
 
-typedef void (* QM_CALLBACK)(int nCurStep, int nTotal); // nTotal: total steps; nCurStep: Current Step.
+typedef void (* QM_CALLBACK)(int cur_step, int n_total); // n_total: total steps; cur_step: Current Step.
 	
-// APIs for qmasr
-_FUNASRAPI FUNASR_HANDLE  FunASRInit(const char* szModelDir, int nThread, bool quantize);
+// APIs for funasr
+_FUNASRAPI FUNASR_HANDLE  FunASRInit(const char* sz_model_dir, int thread_num, bool quantize=false, bool use_vad=false, bool use_punc=false);
 
+// if not give a fn_callback ,it should be NULL 
+_FUNASRAPI FUNASR_RESULT	FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad=false, bool use_punc=false);
+_FUNASRAPI FUNASR_RESULT	FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad=false, bool use_punc=false);
+_FUNASRAPI FUNASR_RESULT	FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad=false, bool use_punc=false);
+_FUNASRAPI FUNASR_RESULT	FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad=false, bool use_punc=false);
 
-// if not give a fnCallback ,it should be NULL 
-_FUNASRAPI FUNASR_RESULT	FunASRRecogBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
-
-_FUNASRAPI FUNASR_RESULT	FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
-
-_FUNASRAPI FUNASR_RESULT	FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
-
-_FUNASRAPI FUNASR_RESULT	FunASRRecogFile(FUNASR_HANDLE handle, const char* szWavfile, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
-
-_FUNASRAPI const char*	FunASRGetResult(FUNASR_RESULT Result,int nIndex);
-
-_FUNASRAPI const int		FunASRGetRetNumber(FUNASR_RESULT Result);
-
-_FUNASRAPI void			FunASRFreeResult(FUNASR_RESULT Result);
-
-_FUNASRAPI void			FunASRUninit(FUNASR_HANDLE Handle);
-
-_FUNASRAPI const float	FunASRGetRetSnippetTime(FUNASR_RESULT Result);
+_FUNASRAPI const char*	FunASRGetResult(FUNASR_RESULT result,int n_index);
+_FUNASRAPI const int	FunASRGetRetNumber(FUNASR_RESULT result);
+_FUNASRAPI void			FunASRFreeResult(FUNASR_RESULT result);
+_FUNASRAPI void			FunASRUninit(FUNASR_HANDLE handle);
+_FUNASRAPI const float	FunASRGetRetSnippetTime(FUNASR_RESULT result);
 
 #ifdef __cplusplus 
 
diff --git a/funasr/runtime/onnxruntime/include/model.h b/funasr/runtime/onnxruntime/include/model.h
new file mode 100644
index 0000000..26a67f0
--- /dev/null
+++ b/funasr/runtime/onnxruntime/include/model.h
@@ -0,0 +1,19 @@
+
+#ifndef MODEL_H
+#define MODEL_H
+
+#include <string>
+
+class Model {
+  public:
+    virtual ~Model(){};
+    virtual void Reset() = 0;
+    virtual std::string ForwardChunk(float *din, int len, int flag) = 0;
+    virtual std::string Forward(float *din, int len, int flag) = 0;
+    virtual std::string Rescoring() = 0;
+    virtual std::vector<std::vector<int>> VadSeg(std::vector<float>& pcm_data)=0;
+    virtual std::string AddPunc(const char* sz_input)=0;
+};
+
+Model *CreateModel(const char *path,int thread_num=1,bool quantize=false, bool use_vad=false, bool use_punc=false);
+#endif
diff --git a/funasr/runtime/onnxruntime/readme.md b/funasr/runtime/onnxruntime/readme.md
index f7be2e0..6886d58 100644
--- a/funasr/runtime/onnxruntime/readme.md
+++ b/funasr/runtime/onnxruntime/readme.md
@@ -25,12 +25,6 @@
 tar -zxvf onnxruntime-linux-x64-1.14.0.tgz
 ```
 
-### Install fftw3
-```shell
-sudo apt install libfftw3-dev #ubuntu
-# sudo yum install fftw fftw-devel #centos
-```
-
 ### Install openblas
 ```shell
 sudo apt-get install libopenblas-dev #ubuntu
@@ -65,12 +59,12 @@
 ## Run the demo
 
 ```shell
-tester /path/models_dir /path/wave_file quantize(true or false)
+funasr-onnx-offline /path/models_dir /path/wave_file quantize(true or false) use_vad(true or false) use_punc(true or false)
 ```
 
 The structure of /path/models_dir
 ```
-config.yaml, am.mvn, model.onnx(or model_quant.onnx)
+config.yaml, am.mvn, model.onnx(or model_quant.onnx), (vad_model.onnx, vad.mvn if you use vad), (punc_model.onnx, punc.yaml if you use vad)
 ```
 
 
diff --git a/funasr/runtime/onnxruntime/src/CMakeLists.txt b/funasr/runtime/onnxruntime/src/CMakeLists.txt
index d41fcd0..e00edc1 100644
--- a/funasr/runtime/onnxruntime/src/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/src/CMakeLists.txt
@@ -1,44 +1,32 @@
 
 file(GLOB files1 "*.cpp")
 file(GLOB files2 "*.cc")
-file(GLOB files4 "paraformer/*.cpp")
 
-set(files ${files1} ${files2} ${files3} ${files4})
-
-# message("${files}")
+set(files ${files1} ${files2})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 
 add_library(funasr ${files})
 
 if(WIN32)
-
-        set(EXTRA_LIBS libfftw3f-3 yaml-cpp)
-        if(CMAKE_CL_64)
-            target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
-        else()
-            target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
-        endif()
-        target_include_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
-        
-        target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT)
+    set(EXTRA_LIBS pthread yaml-cpp csrc)
+    if(CMAKE_CL_64)
+        target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
+    else()
+        target_link_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
+    endif()
+    target_include_directories(funasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
+    
+    target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT)
 else()
-
-    set(EXTRA_LIBS fftw3f pthread yaml-cpp)
-    target_include_directories(funasr PUBLIC "/usr/local/opt/fftw/include")
-    target_link_directories(funasr PUBLIC "/usr/local/opt/fftw/lib")
-
-    target_include_directories(funasr PUBLIC "/usr/local/opt/openblas/include")
-    target_link_directories(funasr PUBLIC "/usr/local/opt/openblas/lib")
-
-    target_include_directories(funasr PUBLIC "/usr/include")
-    target_link_directories(funasr PUBLIC "/usr/lib64")
-
-    target_include_directories(funasr PUBLIC  ${FFTW3F_INCLUDE_DIR})
-    target_link_directories(funasr PUBLIC ${FFTW3F_LIBRARY_DIR})
-    include_directories(${ONNXRUNTIME_DIR}/include)    
+    set(EXTRA_LIBS pthread yaml-cpp csrc)
+    include_directories(${ONNXRUNTIME_DIR}/include)
 endif()
 
 include_directories(${CMAKE_SOURCE_DIR}/include)
 target_link_libraries(funasr PUBLIC onnxruntime ${EXTRA_LIBS})
 
-
+add_executable(funasr-onnx-offline "funasr-onnx-offline.cpp")
+add_executable(funasr-onnx-offline-rtf "funasr-onnx-offline-rtf.cpp")
+target_link_libraries(funasr-onnx-offline PUBLIC funasr)
+target_link_libraries(funasr-onnx-offline-rtf PUBLIC funasr)
 
diff --git a/funasr/runtime/onnxruntime/src/FeatureExtract.cpp b/funasr/runtime/onnxruntime/src/FeatureExtract.cpp
deleted file mode 100644
index 6d2826a..0000000
--- a/funasr/runtime/onnxruntime/src/FeatureExtract.cpp
+++ /dev/null
@@ -1,396 +0,0 @@
-
-#include "precomp.h"
-
-using namespace std;
-
-FeatureExtract::FeatureExtract(int mode) : mode(mode)
-{
-}
-
-FeatureExtract::~FeatureExtract()
-{
-}
-
-void FeatureExtract::reset()
-{
-    speech.reset();
-    fqueue.reset();
-}
-
-int FeatureExtract::size()
-{
-    return fqueue.size();
-}
-
-void FeatureExtract::insert(fftwf_plan plan, float *din, int len, int flag)
-{
-    float* fft_input = (float *)fftwf_malloc(sizeof(float) * fft_size);
-    fftwf_complex* fft_out = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * fft_size);
-    memset(fft_input, 0, sizeof(float) * fft_size);
-
-    const float *window = (const float *)&window_hex;
-    if (mode == 3)
-        window = (const float *)&window_hamm_hex;
-
-    speech.load(din, len);
-    int i, j;
-    float tmp_feature[80];
-    if (mode == 0 || mode == 2 || mode == 3) {
-        int ll = (speech.size() - window_size) / window_shift + 1;
-        fqueue.reinit(ll);
-    }
-
-    for (i = 0; i <= speech.size() - window_size; i = i + window_shift) {
-        float tmp_mean = 0;
-        for (j = 0; j < window_size; j++) {
-            tmp_mean += speech[i + j];
-        }
-
-        tmp_mean = tmp_mean / window_size;
-
-        float pre_val = (float)speech[i] - tmp_mean;
-
-        for (j = 0; j < window_size; j++) {
-            float win = window[j];
-            float cur_val = (float)speech[i + j] - tmp_mean;
-            fft_input[j] = win * (cur_val - 0.97 * pre_val);
-            pre_val = cur_val;
-        }
-
-        fftwf_execute_dft_r2c(plan, fft_input, fft_out);
-
-        melspect((float *)fft_out, tmp_feature);
-        int tmp_flag = S_MIDDLE;
-        if (flag == S_END && i > speech.size() - 560)
-            tmp_flag = S_END;
-
-        fqueue.push(tmp_feature, tmp_flag);
-    }
-    speech.update(i);
-    fftwf_free(fft_input);
-    fftwf_free(fft_out);
-}
-
-bool FeatureExtract::fetch(Tensor<float> *&dout)
-{
-    if (fqueue.size() < 1) {
-        return false;
-    } else {
-        dout = fqueue.pop();
-        return true;
-    }
-}
-
-void FeatureExtract::global_cmvn(float *din)
-{
-    const float *std;
-    const float *mean;
-
-    if (mode < 2) {
-        if (mode == 0) {
-            std = (const float *)global_cmvn_std_hex;
-            mean = (const float *)global_cmvn_mean_hex;
-        } else {
-            std = (const float *)global_cmvn_std_online_hex;
-            mean = (const float *)global_cmvn_mean_online_hex;
-        }
-
-        int i;
-        for (i = 0; i < 80; i++) {
-            float tmp = din[i] < 1e-7 ? 1e-7 : din[i];
-            tmp = log(tmp);
-            din[i] = (tmp - mean[i]) / std[i];
-        }
-    } else {
-        int i;
-
-        int val = 0x34000000;
-        float min_resol = *((float *)&val);
-
-        for (i = 0; i < 80; i++) {
-            float tmp = din[i] < min_resol ? min_resol : din[i];
-            din[i] = log(tmp);
-        }
-    }
-}
-
-void FeatureExtract::melspect(float *din, float *dout)
-{
-    float fftmag[256];
-    const float *melcoe = (const float *)melcoe_hex;
-    int i;
-    for (i = 0; i < 256; i++) {
-        float real = din[2 * i];
-        float imag = din[2 * i + 1];
-        fftmag[i] = real * real + imag * imag;
-    }
-    dout[0] = melcoe[0] * fftmag[1] + melcoe[1] * fftmag[2];
-    dout[1] = melcoe[2] * fftmag[2];
-    dout[2] = melcoe[3] * fftmag[3];
-    dout[3] = melcoe[4] * fftmag[3] + melcoe[5] * fftmag[4];
-    dout[4] = melcoe[6] * fftmag[4] + melcoe[7] * fftmag[5];
-    dout[5] = melcoe[8] * fftmag[5] + melcoe[9] * fftmag[6];
-    dout[6] = melcoe[10] * fftmag[6] + melcoe[11] * fftmag[7];
-    dout[7] = melcoe[12] * fftmag[7];
-    dout[8] = melcoe[13] * fftmag[8];
-    dout[9] = melcoe[14] * fftmag[8] + melcoe[15] * fftmag[9];
-    dout[10] = melcoe[16] * fftmag[9] + melcoe[17] * fftmag[10];
-    dout[11] = melcoe[18] * fftmag[10] + melcoe[19] * fftmag[11];
-    dout[12] = melcoe[20] * fftmag[11] + melcoe[21] * fftmag[12] +
-               melcoe[22] * fftmag[13];
-    dout[13] = melcoe[23] * fftmag[12] + melcoe[24] * fftmag[13] +
-               melcoe[25] * fftmag[14];
-    dout[14] = melcoe[26] * fftmag[14] + melcoe[27] * fftmag[15];
-    dout[15] = melcoe[28] * fftmag[15] + melcoe[29] * fftmag[16];
-    dout[16] = melcoe[30] * fftmag[16] + melcoe[31] * fftmag[17];
-    dout[17] = melcoe[32] * fftmag[17] + melcoe[33] * fftmag[18];
-    dout[18] = melcoe[34] * fftmag[18] + melcoe[35] * fftmag[19] +
-               melcoe[36] * fftmag[20];
-    dout[19] = melcoe[37] * fftmag[19] + melcoe[38] * fftmag[20] +
-               melcoe[39] * fftmag[21];
-    dout[20] = melcoe[40] * fftmag[21] + melcoe[41] * fftmag[22];
-    dout[21] = melcoe[42] * fftmag[22] + melcoe[43] * fftmag[23] +
-               melcoe[44] * fftmag[24];
-    dout[22] = melcoe[45] * fftmag[23] + melcoe[46] * fftmag[24] +
-               melcoe[47] * fftmag[25];
-    dout[23] = melcoe[48] * fftmag[25] + melcoe[49] * fftmag[26] +
-               melcoe[50] * fftmag[27];
-    dout[24] = melcoe[51] * fftmag[26] + melcoe[52] * fftmag[27] +
-               melcoe[53] * fftmag[28];
-    dout[25] = melcoe[54] * fftmag[28] + melcoe[55] * fftmag[29] +
-               melcoe[56] * fftmag[30];
-    dout[26] = melcoe[57] * fftmag[29] + melcoe[58] * fftmag[30] +
-               melcoe[59] * fftmag[31] + melcoe[60] * fftmag[32];
-    dout[27] = melcoe[61] * fftmag[31] + melcoe[62] * fftmag[32] +
-               melcoe[63] * fftmag[33];
-    dout[28] = melcoe[64] * fftmag[33] + melcoe[65] * fftmag[34] +
-               melcoe[66] * fftmag[35];
-    dout[29] = melcoe[67] * fftmag[34] + melcoe[68] * fftmag[35] +
-               melcoe[69] * fftmag[36] + melcoe[70] * fftmag[37];
-    dout[30] = melcoe[71] * fftmag[36] + melcoe[72] * fftmag[37] +
-               melcoe[73] * fftmag[38] + melcoe[74] * fftmag[39];
-    dout[31] = melcoe[75] * fftmag[38] + melcoe[76] * fftmag[39] +
-               melcoe[77] * fftmag[40] + melcoe[78] * fftmag[41];
-    dout[32] = melcoe[79] * fftmag[40] + melcoe[80] * fftmag[41] +
-               melcoe[81] * fftmag[42] + melcoe[82] * fftmag[43];
-    dout[33] = melcoe[83] * fftmag[42] + melcoe[84] * fftmag[43] +
-               melcoe[85] * fftmag[44] + melcoe[86] * fftmag[45];
-    dout[34] = melcoe[87] * fftmag[44] + melcoe[88] * fftmag[45] +
-               melcoe[89] * fftmag[46] + melcoe[90] * fftmag[47];
-    dout[35] = melcoe[91] * fftmag[46] + melcoe[92] * fftmag[47] +
-               melcoe[93] * fftmag[48] + melcoe[94] * fftmag[49];
-    dout[36] = melcoe[95] * fftmag[48] + melcoe[96] * fftmag[49] +
-               melcoe[97] * fftmag[50] + melcoe[98] * fftmag[51];
-    dout[37] = melcoe[99] * fftmag[50] + melcoe[100] * fftmag[51] +
-               melcoe[101] * fftmag[52] + melcoe[102] * fftmag[53] +
-               melcoe[103] * fftmag[54];
-    dout[38] = melcoe[104] * fftmag[52] + melcoe[105] * fftmag[53] +
-               melcoe[106] * fftmag[54] + melcoe[107] * fftmag[55] +
-               melcoe[108] * fftmag[56];
-    dout[39] = melcoe[109] * fftmag[55] + melcoe[110] * fftmag[56] +
-               melcoe[111] * fftmag[57] + melcoe[112] * fftmag[58];
-    dout[40] = melcoe[113] * fftmag[57] + melcoe[114] * fftmag[58] +
-               melcoe[115] * fftmag[59] + melcoe[116] * fftmag[60] +
-               melcoe[117] * fftmag[61];
-    dout[41] = melcoe[118] * fftmag[59] + melcoe[119] * fftmag[60] +
-               melcoe[120] * fftmag[61] + melcoe[121] * fftmag[62] +
-               melcoe[122] * fftmag[63] + melcoe[123] * fftmag[64];
-    dout[42] = melcoe[124] * fftmag[62] + melcoe[125] * fftmag[63] +
-               melcoe[126] * fftmag[64] + melcoe[127] * fftmag[65] +
-               melcoe[128] * fftmag[66];
-    dout[43] = melcoe[129] * fftmag[65] + melcoe[130] * fftmag[66] +
-               melcoe[131] * fftmag[67] + melcoe[132] * fftmag[68] +
-               melcoe[133] * fftmag[69];
-    dout[44] = melcoe[134] * fftmag[67] + melcoe[135] * fftmag[68] +
-               melcoe[136] * fftmag[69] + melcoe[137] * fftmag[70] +
-               melcoe[138] * fftmag[71] + melcoe[139] * fftmag[72];
-    dout[45] = melcoe[140] * fftmag[70] + melcoe[141] * fftmag[71] +
-               melcoe[142] * fftmag[72] + melcoe[143] * fftmag[73] +
-               melcoe[144] * fftmag[74] + melcoe[145] * fftmag[75];
-    dout[46] = melcoe[146] * fftmag[73] + melcoe[147] * fftmag[74] +
-               melcoe[148] * fftmag[75] + melcoe[149] * fftmag[76] +
-               melcoe[150] * fftmag[77] + melcoe[151] * fftmag[78];
-    dout[47] = melcoe[152] * fftmag[76] + melcoe[153] * fftmag[77] +
-               melcoe[154] * fftmag[78] + melcoe[155] * fftmag[79] +
-               melcoe[156] * fftmag[80] + melcoe[157] * fftmag[81];
-    dout[48] = melcoe[158] * fftmag[79] + melcoe[159] * fftmag[80] +
-               melcoe[160] * fftmag[81] + melcoe[161] * fftmag[82] +
-               melcoe[162] * fftmag[83] + melcoe[163] * fftmag[84];
-    dout[49] = melcoe[164] * fftmag[82] + melcoe[165] * fftmag[83] +
-               melcoe[166] * fftmag[84] + melcoe[167] * fftmag[85] +
-               melcoe[168] * fftmag[86] + melcoe[169] * fftmag[87] +
-               melcoe[170] * fftmag[88];
-    dout[50] = melcoe[171] * fftmag[85] + melcoe[172] * fftmag[86] +
-               melcoe[173] * fftmag[87] + melcoe[174] * fftmag[88] +
-               melcoe[175] * fftmag[89] + melcoe[176] * fftmag[90] +
-               melcoe[177] * fftmag[91];
-    dout[51] = melcoe[178] * fftmag[89] + melcoe[179] * fftmag[90] +
-               melcoe[180] * fftmag[91] + melcoe[181] * fftmag[92] +
-               melcoe[182] * fftmag[93] + melcoe[183] * fftmag[94] +
-               melcoe[184] * fftmag[95];
-    dout[52] = melcoe[185] * fftmag[92] + melcoe[186] * fftmag[93] +
-               melcoe[187] * fftmag[94] + melcoe[188] * fftmag[95] +
-               melcoe[189] * fftmag[96] + melcoe[190] * fftmag[97] +
-               melcoe[191] * fftmag[98];
-    dout[53] = melcoe[192] * fftmag[96] + melcoe[193] * fftmag[97] +
-               melcoe[194] * fftmag[98] + melcoe[195] * fftmag[99] +
-               melcoe[196] * fftmag[100] + melcoe[197] * fftmag[101] +
-               melcoe[198] * fftmag[102];
-    dout[54] = melcoe[199] * fftmag[99] + melcoe[200] * fftmag[100] +
-               melcoe[201] * fftmag[101] + melcoe[202] * fftmag[102] +
-               melcoe[203] * fftmag[103] + melcoe[204] * fftmag[104] +
-               melcoe[205] * fftmag[105] + melcoe[206] * fftmag[106];
-    dout[55] = melcoe[207] * fftmag[103] + melcoe[208] * fftmag[104] +
-               melcoe[209] * fftmag[105] + melcoe[210] * fftmag[106] +
-               melcoe[211] * fftmag[107] + melcoe[212] * fftmag[108] +
-               melcoe[213] * fftmag[109] + melcoe[214] * fftmag[110];
-    dout[56] = melcoe[215] * fftmag[107] + melcoe[216] * fftmag[108] +
-               melcoe[217] * fftmag[109] + melcoe[218] * fftmag[110] +
-               melcoe[219] * fftmag[111] + melcoe[220] * fftmag[112] +
-               melcoe[221] * fftmag[113] + melcoe[222] * fftmag[114];
-    dout[57] = melcoe[223] * fftmag[111] + melcoe[224] * fftmag[112] +
-               melcoe[225] * fftmag[113] + melcoe[226] * fftmag[114] +
-               melcoe[227] * fftmag[115] + melcoe[228] * fftmag[116] +
-               melcoe[229] * fftmag[117] + melcoe[230] * fftmag[118] +
-               melcoe[231] * fftmag[119];
-    dout[58] = melcoe[232] * fftmag[115] + melcoe[233] * fftmag[116] +
-               melcoe[234] * fftmag[117] + melcoe[235] * fftmag[118] +
-               melcoe[236] * fftmag[119] + melcoe[237] * fftmag[120] +
-               melcoe[238] * fftmag[121] + melcoe[239] * fftmag[122] +
-               melcoe[240] * fftmag[123];
-    dout[59] = melcoe[241] * fftmag[120] + melcoe[242] * fftmag[121] +
-               melcoe[243] * fftmag[122] + melcoe[244] * fftmag[123] +
-               melcoe[245] * fftmag[124] + melcoe[246] * fftmag[125] +
-               melcoe[247] * fftmag[126] + melcoe[248] * fftmag[127] +
-               melcoe[249] * fftmag[128];
-    dout[60] = melcoe[250] * fftmag[124] + melcoe[251] * fftmag[125] +
-               melcoe[252] * fftmag[126] + melcoe[253] * fftmag[127] +
-               melcoe[254] * fftmag[128] + melcoe[255] * fftmag[129] +
-               melcoe[256] * fftmag[130] + melcoe[257] * fftmag[131] +
-               melcoe[258] * fftmag[132];
-    dout[61] = melcoe[259] * fftmag[129] + melcoe[260] * fftmag[130] +
-               melcoe[261] * fftmag[131] + melcoe[262] * fftmag[132] +
-               melcoe[263] * fftmag[133] + melcoe[264] * fftmag[134] +
-               melcoe[265] * fftmag[135] + melcoe[266] * fftmag[136] +
-               melcoe[267] * fftmag[137];
-    dout[62] = melcoe[268] * fftmag[133] + melcoe[269] * fftmag[134] +
-               melcoe[270] * fftmag[135] + melcoe[271] * fftmag[136] +
-               melcoe[272] * fftmag[137] + melcoe[273] * fftmag[138] +
-               melcoe[274] * fftmag[139] + melcoe[275] * fftmag[140] +
-               melcoe[276] * fftmag[141] + melcoe[277] * fftmag[142];
-    dout[63] = melcoe[278] * fftmag[138] + melcoe[279] * fftmag[139] +
-               melcoe[280] * fftmag[140] + melcoe[281] * fftmag[141] +
-               melcoe[282] * fftmag[142] + melcoe[283] * fftmag[143] +
-               melcoe[284] * fftmag[144] + melcoe[285] * fftmag[145] +
-               melcoe[286] * fftmag[146] + melcoe[287] * fftmag[147];
-    dout[64] = melcoe[288] * fftmag[143] + melcoe[289] * fftmag[144] +
-               melcoe[290] * fftmag[145] + melcoe[291] * fftmag[146] +
-               melcoe[292] * fftmag[147] + melcoe[293] * fftmag[148] +
-               melcoe[294] * fftmag[149] + melcoe[295] * fftmag[150] +
-               melcoe[296] * fftmag[151] + melcoe[297] * fftmag[152] +
-               melcoe[298] * fftmag[153];
-    dout[65] = melcoe[299] * fftmag[148] + melcoe[300] * fftmag[149] +
-               melcoe[301] * fftmag[150] + melcoe[302] * fftmag[151] +
-               melcoe[303] * fftmag[152] + melcoe[304] * fftmag[153] +
-               melcoe[305] * fftmag[154] + melcoe[306] * fftmag[155] +
-               melcoe[307] * fftmag[156] + melcoe[308] * fftmag[157] +
-               melcoe[309] * fftmag[158];
-    dout[66] = melcoe[310] * fftmag[154] + melcoe[311] * fftmag[155] +
-               melcoe[312] * fftmag[156] + melcoe[313] * fftmag[157] +
-               melcoe[314] * fftmag[158] + melcoe[315] * fftmag[159] +
-               melcoe[316] * fftmag[160] + melcoe[317] * fftmag[161] +
-               melcoe[318] * fftmag[162] + melcoe[319] * fftmag[163] +
-               melcoe[320] * fftmag[164];
-    dout[67] = melcoe[321] * fftmag[159] + melcoe[322] * fftmag[160] +
-               melcoe[323] * fftmag[161] + melcoe[324] * fftmag[162] +
-               melcoe[325] * fftmag[163] + melcoe[326] * fftmag[164] +
-               melcoe[327] * fftmag[165] + melcoe[328] * fftmag[166] +
-               melcoe[329] * fftmag[167] + melcoe[330] * fftmag[168] +
-               melcoe[331] * fftmag[169] + melcoe[332] * fftmag[170];
-    dout[68] = melcoe[333] * fftmag[165] + melcoe[334] * fftmag[166] +
-               melcoe[335] * fftmag[167] + melcoe[336] * fftmag[168] +
-               melcoe[337] * fftmag[169] + melcoe[338] * fftmag[170] +
-               melcoe[339] * fftmag[171] + melcoe[340] * fftmag[172] +
-               melcoe[341] * fftmag[173] + melcoe[342] * fftmag[174] +
-               melcoe[343] * fftmag[175] + melcoe[344] * fftmag[176];
-    dout[69] = melcoe[345] * fftmag[171] + melcoe[346] * fftmag[172] +
-               melcoe[347] * fftmag[173] + melcoe[348] * fftmag[174] +
-               melcoe[349] * fftmag[175] + melcoe[350] * fftmag[176] +
-               melcoe[351] * fftmag[177] + melcoe[352] * fftmag[178] +
-               melcoe[353] * fftmag[179] + melcoe[354] * fftmag[180] +
-               melcoe[355] * fftmag[181] + melcoe[356] * fftmag[182];
-    dout[70] = melcoe[357] * fftmag[177] + melcoe[358] * fftmag[178] +
-               melcoe[359] * fftmag[179] + melcoe[360] * fftmag[180] +
-               melcoe[361] * fftmag[181] + melcoe[362] * fftmag[182] +
-               melcoe[363] * fftmag[183] + melcoe[364] * fftmag[184] +
-               melcoe[365] * fftmag[185] + melcoe[366] * fftmag[186] +
-               melcoe[367] * fftmag[187] + melcoe[368] * fftmag[188];
-    dout[71] = melcoe[369] * fftmag[183] + melcoe[370] * fftmag[184] +
-               melcoe[371] * fftmag[185] + melcoe[372] * fftmag[186] +
-               melcoe[373] * fftmag[187] + melcoe[374] * fftmag[188] +
-               melcoe[375] * fftmag[189] + melcoe[376] * fftmag[190] +
-               melcoe[377] * fftmag[191] + melcoe[378] * fftmag[192] +
-               melcoe[379] * fftmag[193] + melcoe[380] * fftmag[194] +
-               melcoe[381] * fftmag[195];
-    dout[72] = melcoe[382] * fftmag[189] + melcoe[383] * fftmag[190] +
-               melcoe[384] * fftmag[191] + melcoe[385] * fftmag[192] +
-               melcoe[386] * fftmag[193] + melcoe[387] * fftmag[194] +
-               melcoe[388] * fftmag[195] + melcoe[389] * fftmag[196] +
-               melcoe[390] * fftmag[197] + melcoe[391] * fftmag[198] +
-               melcoe[392] * fftmag[199] + melcoe[393] * fftmag[200] +
-               melcoe[394] * fftmag[201] + melcoe[395] * fftmag[202];
-    dout[73] = melcoe[396] * fftmag[196] + melcoe[397] * fftmag[197] +
-               melcoe[398] * fftmag[198] + melcoe[399] * fftmag[199] +
-               melcoe[400] * fftmag[200] + melcoe[401] * fftmag[201] +
-               melcoe[402] * fftmag[202] + melcoe[403] * fftmag[203] +
-               melcoe[404] * fftmag[204] + melcoe[405] * fftmag[205] +
-               melcoe[406] * fftmag[206] + melcoe[407] * fftmag[207] +
-               melcoe[408] * fftmag[208] + melcoe[409] * fftmag[209];
-    dout[74] = melcoe[410] * fftmag[203] + melcoe[411] * fftmag[204] +
-               melcoe[412] * fftmag[205] + melcoe[413] * fftmag[206] +
-               melcoe[414] * fftmag[207] + melcoe[415] * fftmag[208] +
-               melcoe[416] * fftmag[209] + melcoe[417] * fftmag[210] +
-               melcoe[418] * fftmag[211] + melcoe[419] * fftmag[212] +
-               melcoe[420] * fftmag[213] + melcoe[421] * fftmag[214] +
-               melcoe[422] * fftmag[215] + melcoe[423] * fftmag[216];
-    dout[75] = melcoe[424] * fftmag[210] + melcoe[425] * fftmag[211] +
-               melcoe[426] * fftmag[212] + melcoe[427] * fftmag[213] +
-               melcoe[428] * fftmag[214] + melcoe[429] * fftmag[215] +
-               melcoe[430] * fftmag[216] + melcoe[431] * fftmag[217] +
-               melcoe[432] * fftmag[218] + melcoe[433] * fftmag[219] +
-               melcoe[434] * fftmag[220] + melcoe[435] * fftmag[221] +
-               melcoe[436] * fftmag[222] + melcoe[437] * fftmag[223];
-    dout[76] = melcoe[438] * fftmag[217] + melcoe[439] * fftmag[218] +
-               melcoe[440] * fftmag[219] + melcoe[441] * fftmag[220] +
-               melcoe[442] * fftmag[221] + melcoe[443] * fftmag[222] +
-               melcoe[444] * fftmag[223] + melcoe[445] * fftmag[224] +
-               melcoe[446] * fftmag[225] + melcoe[447] * fftmag[226] +
-               melcoe[448] * fftmag[227] + melcoe[449] * fftmag[228] +
-               melcoe[450] * fftmag[229] + melcoe[451] * fftmag[230] +
-               melcoe[452] * fftmag[231];
-    dout[77] = melcoe[453] * fftmag[224] + melcoe[454] * fftmag[225] +
-               melcoe[455] * fftmag[226] + melcoe[456] * fftmag[227] +
-               melcoe[457] * fftmag[228] + melcoe[458] * fftmag[229] +
-               melcoe[459] * fftmag[230] + melcoe[460] * fftmag[231] +
-               melcoe[461] * fftmag[232] + melcoe[462] * fftmag[233] +
-               melcoe[463] * fftmag[234] + melcoe[464] * fftmag[235] +
-               melcoe[465] * fftmag[236] + melcoe[466] * fftmag[237] +
-               melcoe[467] * fftmag[238] + melcoe[468] * fftmag[239];
-    dout[78] = melcoe[469] * fftmag[232] + melcoe[470] * fftmag[233] +
-               melcoe[471] * fftmag[234] + melcoe[472] * fftmag[235] +
-               melcoe[473] * fftmag[236] + melcoe[474] * fftmag[237] +
-               melcoe[475] * fftmag[238] + melcoe[476] * fftmag[239] +
-               melcoe[477] * fftmag[240] + melcoe[478] * fftmag[241] +
-               melcoe[479] * fftmag[242] + melcoe[480] * fftmag[243] +
-               melcoe[481] * fftmag[244] + melcoe[482] * fftmag[245] +
-               melcoe[483] * fftmag[246] + melcoe[484] * fftmag[247];
-    dout[79] = melcoe[485] * fftmag[240] + melcoe[486] * fftmag[241] +
-               melcoe[487] * fftmag[242] + melcoe[488] * fftmag[243] +
-               melcoe[489] * fftmag[244] + melcoe[490] * fftmag[245] +
-               melcoe[491] * fftmag[246] + melcoe[492] * fftmag[247] +
-               melcoe[493] * fftmag[248] + melcoe[494] * fftmag[249] +
-               melcoe[495] * fftmag[250] + melcoe[496] * fftmag[251] +
-               melcoe[497] * fftmag[252] + melcoe[498] * fftmag[253] +
-               melcoe[499] * fftmag[254] + melcoe[500] * fftmag[255];
-    global_cmvn(dout);
-}
diff --git a/funasr/runtime/onnxruntime/src/FeatureExtract.h b/funasr/runtime/onnxruntime/src/FeatureExtract.h
deleted file mode 100644
index 8296253..0000000
--- a/funasr/runtime/onnxruntime/src/FeatureExtract.h
+++ /dev/null
@@ -1,35 +0,0 @@
-
-#ifndef FEATUREEXTRACT_H
-#define FEATUREEXTRACT_H
-
-#include <fftw3.h>
-#include <stdint.h>
-
-#include "FeatureQueue.h"
-#include "SpeechWrap.h"
-#include "Tensor.h"
-
-class FeatureExtract {
-  private:
-    SpeechWrap speech;
-    FeatureQueue fqueue;
-    int mode;
-    int fft_size = 512;
-    int window_size = 400;
-    int window_shift = 160;
-
-    //void fftw_init();
-    void melspect(float *din, float *dout);
-    void global_cmvn(float *din);
-
-  public:
-    FeatureExtract(int mode);
-    ~FeatureExtract();
-    int size();
-    //int status();
-    void reset();
-    void insert(fftwf_plan plan, float *din, int len, int flag);
-    bool fetch(Tensor<float> *&dout);
-};
-
-#endif
diff --git a/funasr/runtime/onnxruntime/src/FeatureQueue.cpp b/funasr/runtime/onnxruntime/src/FeatureQueue.cpp
deleted file mode 100644
index f07633b..0000000
--- a/funasr/runtime/onnxruntime/src/FeatureQueue.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#include "precomp.h"
-FeatureQueue::FeatureQueue()
-{
-    buff = new Tensor<float>(67, 80);
-    window_size = 67;
-    buff_idx = 0;
-}
-
-FeatureQueue::~FeatureQueue()
-{
-    delete buff;
-}
-
-void FeatureQueue::reinit(int size)
-{
-    delete buff;
-    buff = new Tensor<float>(size, 80);
-    buff_idx = 0;
-    window_size = size;
-}
-
-void FeatureQueue::reset()
-{
-    buff_idx = 0;
-}
-
-void FeatureQueue::push(float *din, int flag)
-{
-    int offset = buff_idx * 80;
-    memcpy(buff->buff + offset, din, 80 * sizeof(float));
-    buff_idx++;
-
-    if (flag == S_END) {
-        Tensor<float> *tmp = new Tensor<float>(buff_idx, 80);
-        memcpy(tmp->buff, buff->buff, buff_idx * 80 * sizeof(float));
-        feature_queue.push(tmp);
-        buff_idx = 0;
-    } else if (buff_idx == window_size) {
-        feature_queue.push(buff);
-        Tensor<float> *tmp = new Tensor<float>(window_size, 80);
-        memcpy(tmp->buff, buff->buff + (window_size - 3) * 80,
-               3 * 80 * sizeof(float));
-        buff_idx = 3;
-        buff = tmp;
-    }
-}
-
-Tensor<float> *FeatureQueue::pop()
-{
-
-    Tensor<float> *tmp = feature_queue.front();
-    feature_queue.pop();
-    return tmp;
-}
-
-int FeatureQueue::size()
-{
-    return feature_queue.size();
-}
diff --git a/funasr/runtime/onnxruntime/src/FeatureQueue.h b/funasr/runtime/onnxruntime/src/FeatureQueue.h
deleted file mode 100644
index be3360b..0000000
--- a/funasr/runtime/onnxruntime/src/FeatureQueue.h
+++ /dev/null
@@ -1,28 +0,0 @@
-
-#ifndef FEATUREQUEUE_H
-#define FEATUREQUEUE_H
-
-#include "Tensor.h"
-#include <queue>
-#include <stdint.h>
-using namespace std;
-
-
-class FeatureQueue {
-  private:
-    queue<Tensor<float> *> feature_queue;
-    Tensor<float> *buff;
-    int buff_idx;
-    int window_size;
-
-  public:
-    FeatureQueue();
-    ~FeatureQueue();
-    void reinit(int size);
-    void reset();
-    void push(float *din, int flag);
-    Tensor<float> *pop();
-    int size();
-};
-
-#endif
diff --git a/funasr/runtime/onnxruntime/src/Model.cpp b/funasr/runtime/onnxruntime/src/Model.cpp
deleted file mode 100644
index 7ddb635..0000000
--- a/funasr/runtime/onnxruntime/src/Model.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-#include "precomp.h"
-
-Model *create_model(const char *path, int nThread, bool quantize)
-{
-    Model *mm;
-
-    mm = new paraformer::ModelImp(path, nThread, quantize);
-
-    return mm;
-}
diff --git a/funasr/runtime/onnxruntime/src/SpeechWrap.cpp b/funasr/runtime/onnxruntime/src/SpeechWrap.cpp
deleted file mode 100644
index 60d0a2b..0000000
--- a/funasr/runtime/onnxruntime/src/SpeechWrap.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "precomp.h"
-
-SpeechWrap::SpeechWrap()
-{
-    cache_size = 0;
-}
-
-SpeechWrap::~SpeechWrap()
-{
-}
-
-void SpeechWrap::reset()
-{
-    cache_size = 0;
-}
-
-void SpeechWrap::load(float *din, int len)
-{
-    in = din;
-    in_size = len;
-    total_size = cache_size + in_size;
-}
-
-int SpeechWrap::size()
-{
-    return total_size;
-}
-
-void SpeechWrap::update(int offset)
-{
-    int in_offset = offset - cache_size;
-    cache_size = (total_size - offset);
-    memcpy(cache, in + in_offset, cache_size * sizeof(float));
-}
-
-float &SpeechWrap::operator[](int i)
-{
-    return i < cache_size ? cache[i] : in[i - cache_size];
-}
diff --git a/funasr/runtime/onnxruntime/src/SpeechWrap.h b/funasr/runtime/onnxruntime/src/SpeechWrap.h
deleted file mode 100644
index 5d3ee40..0000000
--- a/funasr/runtime/onnxruntime/src/SpeechWrap.h
+++ /dev/null
@@ -1,26 +0,0 @@
-
-#ifndef SPEECHWRAP_H
-#define SPEECHWRAP_H
-
-#include <stdint.h>
-
-class SpeechWrap {
-  private:
-    float cache[400];
-    int cache_size;
-    float *in;
-    int in_size;
-    int total_size;
-    int next_cache_size;
-
-  public:
-    SpeechWrap();
-    ~SpeechWrap();
-    void load(float *din, int len);
-    void update(int offset);
-    void reset();
-    int size();
-    float &operator[](int i);
-};
-
-#endif
diff --git a/funasr/runtime/onnxruntime/src/Vocab.h b/funasr/runtime/onnxruntime/src/Vocab.h
deleted file mode 100644
index 023671a..0000000
--- a/funasr/runtime/onnxruntime/src/Vocab.h
+++ /dev/null
@@ -1,25 +0,0 @@
-
-#ifndef VOCAB_H
-#define VOCAB_H
-
-#include <stdint.h>
-#include <string>
-#include <vector>
-using namespace std;
-
-class Vocab {
-  private:
-    vector<string> vocab;
-    bool isChinese(string ch);
-    bool isEnglish(string ch);
-    void loadVocabFromYaml(const char* filename);
-
-  public:
-    Vocab(const char *filename);
-    ~Vocab();
-    int size();
-    string vector2string(vector<int> in);
-    string vector2stringV2(vector<int> in);
-};
-
-#endif
diff --git a/funasr/runtime/onnxruntime/src/alignedmem.cpp b/funasr/runtime/onnxruntime/src/alignedmem.cpp
index e174afe..d3e4b82 100644
--- a/funasr/runtime/onnxruntime/src/alignedmem.cpp
+++ b/funasr/runtime/onnxruntime/src/alignedmem.cpp
@@ -1,5 +1,5 @@
 #include "precomp.h"
-void *aligned_malloc(size_t alignment, size_t required_bytes)
+void *AlignedMalloc(size_t alignment, size_t required_bytes)
 {
     void *p1;  // original block
     void **p2; // aligned block
@@ -12,7 +12,7 @@
     return p2;
 }
 
-void aligned_free(void *p)
+void AlignedFree(void *p)
 {
     free(((void **)p)[-1]);
 }
diff --git a/funasr/runtime/onnxruntime/src/alignedmem.h b/funasr/runtime/onnxruntime/src/alignedmem.h
index dca68f4..e2b640a 100644
--- a/funasr/runtime/onnxruntime/src/alignedmem.h
+++ b/funasr/runtime/onnxruntime/src/alignedmem.h
@@ -2,9 +2,7 @@
 #ifndef ALIGNEDMEM_H
 #define ALIGNEDMEM_H
 
-
-
-extern void *aligned_malloc(size_t alignment, size_t required_bytes);
-extern void aligned_free(void *p);
+extern void *AlignedMalloc(size_t alignment, size_t required_bytes);
+extern void AlignedFree(void *p);
 
 #endif
diff --git a/funasr/runtime/onnxruntime/src/Audio.cpp b/funasr/runtime/onnxruntime/src/audio.cpp
similarity index 76%
rename from funasr/runtime/onnxruntime/src/Audio.cpp
rename to funasr/runtime/onnxruntime/src/audio.cpp
index 38b6de8..ef48fa1 100644
--- a/funasr/runtime/onnxruntime/src/Audio.cpp
+++ b/funasr/runtime/onnxruntime/src/audio.cpp
@@ -6,7 +6,7 @@
 #include <fstream>
 #include <assert.h>
 
-#include "Audio.h"
+#include "audio.h"
 #include "precomp.h"
 
 using namespace std;
@@ -128,39 +128,30 @@
     start = 0;
 };
 AudioFrame::~AudioFrame(){};
-int AudioFrame::set_start(int val)
+int AudioFrame::SetStart(int val)
 {
     start = val < 0 ? 0 : val;
     return start;
 };
 
-int AudioFrame::set_end(int val, int max_len)
+int AudioFrame::SetEnd(int val)
 {
-
-    float num_samples = val - start;
-    float frame_length = 400;
-    float frame_shift = 160;
-    float num_new_samples =
-        ceil((num_samples - frame_length) / frame_shift) * frame_shift + frame_length;
-
-    end = start + num_new_samples;
-    len = (int)num_new_samples;
-    if (end > max_len)
-        printf("frame end > max_len!!!!!!!\n");
+    end = val;
+    len = end - start;
     return end;
 };
 
-int AudioFrame::get_start()
+int AudioFrame::GetStart()
 {
     return start;
 };
 
-int AudioFrame::get_len()
+int AudioFrame::GetLen()
 {
     return len;
 };
 
-int AudioFrame::disp()
+int AudioFrame::Disp()
 {
     printf("not imp!!!!\n");
 
@@ -194,27 +185,27 @@
     }
 }
 
-void Audio::disp()
+void Audio::Disp()
 {
-    printf("Audio time is %f s. len is %d\n", (float)speech_len / model_sample_rate,
+    printf("Audio time is %f s. len is %d\n", (float)speech_len / MODEL_SAMPLE_RATE,
            speech_len);
 }
 
-float Audio::get_time_len()
+float Audio::GetTimeLen()
 {
-    return (float)speech_len / model_sample_rate;
+    return (float)speech_len / MODEL_SAMPLE_RATE;
 }
 
-void Audio::wavResample(int32_t sampling_rate, const float *waveform,
+void Audio::WavResample(int32_t sampling_rate, const float *waveform,
                           int32_t n)
 {
     printf(
           "Creating a resampler:\n"
           "   in_sample_rate: %d\n"
           "   output_sample_rate: %d\n",
-          sampling_rate, static_cast<int32_t>(model_sample_rate));
+          sampling_rate, static_cast<int32_t>(MODEL_SAMPLE_RATE));
     float min_freq =
-        std::min<int32_t>(sampling_rate, model_sample_rate);
+        std::min<int32_t>(sampling_rate, MODEL_SAMPLE_RATE);
     float lowpass_cutoff = 0.99 * 0.5 * min_freq;
 
     int32_t lowpass_filter_width = 6;
@@ -222,7 +213,7 @@
     //auto resampler = new LinearResample(
     //      sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
     auto resampler = std::make_unique<LinearResample>(
-          sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
+          sampling_rate, MODEL_SAMPLE_RATE, lowpass_cutoff, lowpass_filter_width);
     std::vector<float> samples;
     resampler->Resample(waveform, n, true, &samples);
     //reset speech_data
@@ -235,7 +226,7 @@
     copy(samples.begin(), samples.end(), speech_data);
 }
 
-bool Audio::loadwav(const char *filename, int32_t* sampling_rate)
+bool Audio::LoadWav(const char *filename, int32_t* sampling_rate)
 {
     WaveHeader header;
     if (speech_data != NULL) {
@@ -279,8 +270,8 @@
         }
 
         //resample
-        if(*sampling_rate != model_sample_rate){
-            wavResample(*sampling_rate, speech_data, speech_len);
+        if(*sampling_rate != MODEL_SAMPLE_RATE){
+            WavResample(*sampling_rate, speech_data, speech_len);
         }
 
         AudioFrame* frame = new AudioFrame(speech_len);
@@ -292,7 +283,7 @@
         return false;
 }
 
-bool Audio::loadwav(const char* buf, int nFileLen, int32_t* sampling_rate)
+bool Audio::LoadWav(const char* buf, int n_file_len, int32_t* sampling_rate)
 {
     WaveHeader header;
     if (speech_data != NULL) {
@@ -326,8 +317,8 @@
         }
         
         //resample
-        if(*sampling_rate != model_sample_rate){
-            wavResample(*sampling_rate, speech_data, speech_len);
+        if(*sampling_rate != MODEL_SAMPLE_RATE){
+            WavResample(*sampling_rate, speech_data, speech_len);
         }
 
         AudioFrame* frame = new AudioFrame(speech_len);
@@ -339,7 +330,7 @@
         return false;
 }
 
-bool Audio::loadpcmwav(const char* buf, int nBufLen, int32_t* sampling_rate)
+bool Audio::LoadPcmwav(const char* buf, int n_buf_len, int32_t* sampling_rate)
 {
     if (speech_data != NULL) {
         free(speech_data);
@@ -349,7 +340,7 @@
     }
     offset = 0;
 
-    speech_len = nBufLen / 2;
+    speech_len = n_buf_len / 2;
     speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
     if (speech_buff)
     {
@@ -369,8 +360,8 @@
         }
         
         //resample
-        if(*sampling_rate != model_sample_rate){
-            wavResample(*sampling_rate, speech_data, speech_len);
+        if(*sampling_rate != MODEL_SAMPLE_RATE){
+            WavResample(*sampling_rate, speech_data, speech_len);
         }
 
         AudioFrame* frame = new AudioFrame(speech_len);
@@ -382,7 +373,7 @@
         return false;
 }
 
-bool Audio::loadpcmwav(const char* filename, int32_t* sampling_rate)
+bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate)
 {
     if (speech_data != NULL) {
         free(speech_data);
@@ -397,10 +388,10 @@
     if (fp == nullptr)
         return false;
     fseek(fp, 0, SEEK_END);
-    uint32_t nFileLen = ftell(fp);
+    uint32_t n_file_len = ftell(fp);
     fseek(fp, 0, SEEK_SET);
 
-    speech_len = (nFileLen) / 2;
+    speech_len = (n_file_len) / 2;
     speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
     if (speech_buff)
     {
@@ -420,8 +411,8 @@
         }
 
         //resample
-        if(*sampling_rate != model_sample_rate){
-            wavResample(*sampling_rate, speech_data, speech_len);
+        if(*sampling_rate != MODEL_SAMPLE_RATE){
+            WavResample(*sampling_rate, speech_data, speech_len);
         }
 
         AudioFrame* frame = new AudioFrame(speech_len);
@@ -434,7 +425,7 @@
 
 }
 
-int Audio::fetch_chunck(float *&dout, int len)
+int Audio::FetchChunck(float *&dout, int len)
 {
     if (offset >= speech_align_len) {
         dout = NULL;
@@ -455,14 +446,14 @@
     }
 }
 
-int Audio::fetch(float *&dout, int &len, int &flag)
+int Audio::Fetch(float *&dout, int &len, int &flag)
 {
     if (frame_queue.size() > 0) {
         AudioFrame *frame = frame_queue.front();
         frame_queue.pop();
 
-        dout = speech_data + frame->get_start();
-        len = frame->get_len();
+        dout = speech_data + frame->GetStart();
+        len = frame->GetLen();
         delete frame;
         flag = S_END;
         return 1;
@@ -471,9 +462,8 @@
     }
 }
 
-void Audio::padding()
+void Audio::Padding()
 {
-
     float num_samples = speech_len;
     float frame_length = 400;
     float frame_shift = 160;
@@ -509,71 +499,27 @@
     delete frame;
 }
 
-#define UNTRIGGERED 0
-#define TRIGGERED   1
-
-#define SPEECH_LEN_5S  (16000 * 5)
-#define SPEECH_LEN_10S (16000 * 10)
-#define SPEECH_LEN_20S (16000 * 20)
-#define SPEECH_LEN_30S (16000 * 30)
-
-/*
-void Audio::split()
+void Audio::Split(Model* recog_obj)
 {
-    VadInst *handle = WebRtcVad_Create();
-    WebRtcVad_Init(handle);
-    WebRtcVad_set_mode(handle, 2);
-    int window_size = 10;
-    AudioWindow audiowindow(window_size);
-    int status = UNTRIGGERED;
-    int offset = 0;
-    int fs = 16000;
-    int step = 480;
-
     AudioFrame *frame;
 
     frame = frame_queue.front();
     frame_queue.pop();
+    int sp_len = frame->GetLen();
     delete frame;
     frame = NULL;
 
-    while (offset < speech_len - step) {
-        int n = WebRtcVad_Process(handle, fs, speech_buff + offset, step);
-        if (status == UNTRIGGERED && audiowindow.put(n) >= window_size - 1) {
-            frame = new AudioFrame();
-            int start = offset - step * (window_size - 1);
-            frame->set_start(start);
-            status = TRIGGERED;
-        } else if (status == TRIGGERED) {
-            int win_weight = audiowindow.put(n);
-            int voice_len = (offset - frame->get_start());
-            int gap = 0;
-            if (voice_len < SPEECH_LEN_5S) {
-                offset += step;
-                continue;
-            } else if (voice_len < SPEECH_LEN_10S) {
-                gap = 1;
-            } else if (voice_len < SPEECH_LEN_20S) {
-                gap = window_size / 5;
-            } else {
-                gap = window_size / 2;
-            }
-
-            if (win_weight < gap) {
-                status = UNTRIGGERED;
-                offset = frame->set_end(offset, speech_align_len);
-                frame_queue.push(frame);
-                frame = NULL;
-            }
-        }
-        offset += step;
-    }
-
-    if (frame != NULL) {
-        frame->set_end(speech_len, speech_align_len);
+    std::vector<float> pcm_data(speech_data, speech_data+sp_len);
+    vector<std::vector<int>> vad_segments = recog_obj->VadSeg(pcm_data);
+    int seg_sample = MODEL_SAMPLE_RATE/1000;
+    for(vector<int> segment:vad_segments)
+    {
+        frame = new AudioFrame();
+        int start = segment[0]*seg_sample;
+        int end = segment[1]*seg_sample;
+        frame->SetStart(start);
+        frame->SetEnd(end);
         frame_queue.push(frame);
         frame = NULL;
     }
-    WebRtcVad_Free(handle);
 }
-*/
\ No newline at end of file
diff --git a/funasr/runtime/onnxruntime/src/CommonStruct.h b/funasr/runtime/onnxruntime/src/common-struct.h
similarity index 100%
rename from funasr/runtime/onnxruntime/src/CommonStruct.h
rename to funasr/runtime/onnxruntime/src/common-struct.h
diff --git a/funasr/runtime/onnxruntime/src/commonfunc.h b/funasr/runtime/onnxruntime/src/commonfunc.h
index 5198030..fbbda74 100644
--- a/funasr/runtime/onnxruntime/src/commonfunc.h
+++ b/funasr/runtime/onnxruntime/src/commonfunc.h
@@ -1,6 +1,5 @@
 #pragma once 
-
-
+#include <algorithm>
 typedef struct
 {
     std::string msg;
@@ -11,46 +10,45 @@
 #ifdef _WIN32
 #include <codecvt>
 
-
-
-inline std::wstring string2wstring(const std::string& str, const std::string& locale)
+inline std::wstring String2wstring(const std::string& str, const std::string& locale)
 {
     typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
     std::wstring_convert<F> strCnv(new F(locale));
     return strCnv.from_bytes(str);
 }
 
-inline std::wstring  strToWstr(std::string str) {
+inline std::wstring  StrToWstr(std::string str) {
     if (str.length() == 0)
         return L"";
-    return  string2wstring(str, "zh-CN");
+    return  String2wstring(str, "zh-CN");
 
 }
 
 #endif
 
-
-
-inline void getInputName(Ort::Session* session, string& inputName,int nIndex=0) {
+inline void GetInputName(Ort::Session* session, string& inputName,int nIndex=0) {
     size_t numInputNodes = session->GetInputCount();
     if (numInputNodes > 0) {
         Ort::AllocatorWithDefaultOptions allocator;
         {
             auto t = session->GetInputNameAllocated(nIndex, allocator);
             inputName = t.get();
-
         }
     }
 }
 
-inline void getOutputName(Ort::Session* session, string& outputName, int nIndex = 0) {
+inline void GetOutputName(Ort::Session* session, string& outputName, int nIndex = 0) {
     size_t numOutputNodes = session->GetOutputCount();
     if (numOutputNodes > 0) {
         Ort::AllocatorWithDefaultOptions allocator;
         {
             auto t = session->GetOutputNameAllocated(nIndex, allocator);
             outputName = t.get();
-
         }
     }
 }
+
+template <class ForwardIterator>
+inline static size_t Argmax(ForwardIterator first, ForwardIterator last) {
+    return std::distance(first, std::max_element(first, last));
+}
diff --git a/funasr/runtime/onnxruntime/src/ct-transformer.cpp b/funasr/runtime/onnxruntime/src/ct-transformer.cpp
new file mode 100644
index 0000000..3d66dcd
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/ct-transformer.cpp
@@ -0,0 +1,188 @@
+#include "precomp.h"
+
+CTTransformer::CTTransformer(const char* sz_model_dir, int thread_num)
+:env_(ORT_LOGGING_LEVEL_ERROR, ""),session_options{}
+{
+    session_options.SetIntraOpNumThreads(thread_num);
+    session_options.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
+    session_options.DisableCpuMemArena();
+
+	string strModelPath = PathAppend(sz_model_dir, PUNC_MODEL_FILE);
+	string strYamlPath = PathAppend(sz_model_dir, PUNC_YAML_FILE);
+
+    try{
+#ifdef _WIN32
+	std::wstring detPath = strToWstr(strModelPath);
+    m_session = std::make_unique<Ort::Session>(env_, detPath.c_str(), session_options);
+#else
+    m_session = std::make_unique<Ort::Session>(env_, strModelPath.c_str(), session_options);
+#endif
+    }
+    catch(exception e)
+    {
+        printf(e.what());
+    }
+    // read inputnames outputnamess
+    string strName;
+    GetInputName(m_session.get(), strName);
+    m_strInputNames.push_back(strName.c_str());
+    GetInputName(m_session.get(), strName, 1);
+    m_strInputNames.push_back(strName);
+    
+    GetOutputName(m_session.get(), strName);
+    m_strOutputNames.push_back(strName);
+
+    for (auto& item : m_strInputNames)
+        m_szInputNames.push_back(item.c_str());
+    for (auto& item : m_strOutputNames)
+        m_szOutputNames.push_back(item.c_str());
+
+	m_tokenizer.OpenYaml(strYamlPath.c_str());
+}
+
+CTTransformer::~CTTransformer()
+{
+}
+
+string CTTransformer::AddPunc(const char* sz_input)
+{
+    string strResult;
+    vector<string> strOut;
+    vector<int> InputData;
+    m_tokenizer.Tokenize(sz_input, strOut, InputData); 
+
+    int nTotalBatch = ceil((float)InputData.size() / TOKEN_LEN);
+    int nCurBatch = -1;
+    int nSentEnd = -1, nLastCommaIndex = -1;
+    vector<int64_t> RemainIDs; // 
+    vector<string> RemainStr; //
+    vector<int> NewPunctuation; //
+    vector<string> NewString; //
+    vector<string> NewSentenceOut;
+    vector<int> NewPuncOut;
+    int nDiff = 0;
+    for (size_t i = 0; i < InputData.size(); i += TOKEN_LEN)
+    {
+        nDiff = (i + TOKEN_LEN) < InputData.size() ? (0) : (i + TOKEN_LEN - InputData.size());
+        vector<int64_t> InputIDs(InputData.begin() + i, InputData.begin() + i + TOKEN_LEN - nDiff);
+        vector<string> InputStr(strOut.begin() + i, strOut.begin() + i + TOKEN_LEN - nDiff);
+        InputIDs.insert(InputIDs.begin(), RemainIDs.begin(), RemainIDs.end()); // RemainIDs+InputIDs;
+        InputStr.insert(InputStr.begin(), RemainStr.begin(), RemainStr.end()); // RemainStr+InputStr;
+
+        auto Punction = Infer(InputIDs);
+        nCurBatch = i / TOKEN_LEN;
+        if (nCurBatch < nTotalBatch - 1) // not the last minisetence
+        {
+            nSentEnd = -1;
+            nLastCommaIndex = -1;
+            for (int nIndex = Punction.size() - 2; nIndex > 0; nIndex--)
+            {
+                if (m_tokenizer.Id2Punc(Punction[nIndex]) == m_tokenizer.Id2Punc(PERIOD_INDEX) || m_tokenizer.Id2Punc(Punction[nIndex]) == m_tokenizer.Id2Punc(QUESTION_INDEX))
+                {
+                    nSentEnd = nIndex;
+                    break;
+                }
+                if (nLastCommaIndex < 0 && m_tokenizer.Id2Punc(Punction[nIndex]) == m_tokenizer.Id2Punc(COMMA_INDEX))
+                {
+                    nLastCommaIndex = nIndex;
+                }
+            }
+            if (nSentEnd < 0 && InputStr.size() > CACHE_POP_TRIGGER_LIMIT && nLastCommaIndex > 0)
+            {
+                nSentEnd = nLastCommaIndex;
+                Punction[nSentEnd] = PERIOD_INDEX;
+            }
+            RemainStr.assign(InputStr.begin() + nSentEnd + 1, InputStr.end());
+            RemainIDs.assign(InputIDs.begin() + nSentEnd + 1, InputIDs.end());
+            InputStr.assign(InputStr.begin(), InputStr.begin() + nSentEnd + 1);  // minit_sentence
+            Punction.assign(Punction.begin(), Punction.begin() + nSentEnd + 1);
+        }
+        
+        NewPunctuation.insert(NewPunctuation.end(), Punction.begin(), Punction.end());
+        vector<string> WordWithPunc;
+        for (int i = 0; i < InputStr.size(); i++)
+        {
+            if (i > 0 && !(InputStr[i][0] & 0x80) && (i + 1) <InputStr.size() && !(InputStr[i+1][0] & 0x80))// 锟叫硷拷锟接拷模锟�
+            {
+                InputStr[i] = InputStr[i]+ " ";
+            }
+            WordWithPunc.push_back(InputStr[i]);
+
+            if (Punction[i] != NOTPUNC_INDEX) // 锟铰伙拷锟斤拷
+            {
+                WordWithPunc.push_back(m_tokenizer.Id2Punc(Punction[i]));
+            }
+        }
+
+        NewString.insert(NewString.end(), WordWithPunc.begin(), WordWithPunc.end()); // new_mini_sentence += "".join(words_with_punc)
+        NewSentenceOut = NewString;
+        NewPuncOut = NewPunctuation;
+        // last mini sentence
+        if(nCurBatch == nTotalBatch - 1)
+        {
+            if (NewString[NewString.size() - 1] == m_tokenizer.Id2Punc(COMMA_INDEX) || NewString[NewString.size() - 1] == m_tokenizer.Id2Punc(DUN_INDEX))
+            {
+                NewSentenceOut.assign(NewString.begin(), NewString.end() - 1);
+                NewSentenceOut.push_back(m_tokenizer.Id2Punc(PERIOD_INDEX));
+                NewPuncOut.assign(NewPunctuation.begin(), NewPunctuation.end() - 1);
+                NewPuncOut.push_back(PERIOD_INDEX);
+            }
+            else if (NewString[NewString.size() - 1] == m_tokenizer.Id2Punc(PERIOD_INDEX) && NewString[NewString.size() - 1] == m_tokenizer.Id2Punc(QUESTION_INDEX))
+            {
+                NewSentenceOut = NewString;
+                NewSentenceOut.push_back(m_tokenizer.Id2Punc(PERIOD_INDEX));
+                NewPuncOut = NewPunctuation;
+                NewPuncOut.push_back(PERIOD_INDEX);
+            }
+        }
+    }
+    for (auto& item : NewSentenceOut)
+        strResult += item;
+    return strResult;
+}
+
+vector<int> CTTransformer::Infer(vector<int64_t> input_data)
+{
+    Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+    vector<int> punction;
+    std::array<int64_t, 2> input_shape_{ 1, (int64_t)input_data.size()};
+    Ort::Value onnx_input = Ort::Value::CreateTensor<int64_t>(m_memoryInfo,
+        input_data.data(),
+        input_data.size(),
+        input_shape_.data(),
+        input_shape_.size());
+
+    std::array<int32_t,1> text_lengths{ (int32_t)input_data.size() };
+    std::array<int64_t,1> text_lengths_dim{ 1 };
+    Ort::Value onnx_text_lengths = Ort::Value::CreateTensor(
+        m_memoryInfo,
+        text_lengths.data(),
+        text_lengths.size() * sizeof(int32_t),
+        text_lengths_dim.data(),
+        text_lengths_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32);
+    std::vector<Ort::Value> input_onnx;
+    input_onnx.emplace_back(std::move(onnx_input));
+    input_onnx.emplace_back(std::move(onnx_text_lengths));
+        
+    try {
+        auto outputTensor = m_session->Run(Ort::RunOptions{nullptr}, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size());
+        std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
+
+        int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
+        float * floatData = outputTensor[0].GetTensorMutableData<float>();
+
+        for (int i = 0; i < outputCount; i += CANDIDATE_NUM)
+        {
+            int index = Argmax(floatData + i, floatData + i + CANDIDATE_NUM-1);
+            punction.push_back(index);
+        }
+    }
+    catch (std::exception const &e)
+    {
+        printf(e.what());
+    }
+    return punction;
+}
+
+
+
diff --git a/funasr/runtime/onnxruntime/src/ct-transformer.h b/funasr/runtime/onnxruntime/src/ct-transformer.h
new file mode 100644
index 0000000..77972c7
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/ct-transformer.h
@@ -0,0 +1,26 @@
+#pragma once 
+
+class CTTransformer {
+/**
+ * Author: Speech Lab of DAMO Academy, Alibaba Group
+ * CT-Transformer: Controllable time-delay transformer for real-time punctuation prediction and disfluency detection
+ * https://arxiv.org/pdf/2003.01309.pdf
+*/
+
+private:
+
+	CTokenizer m_tokenizer;
+	vector<string> m_strInputNames, m_strOutputNames;
+	vector<const char*> m_szInputNames;
+	vector<const char*> m_szOutputNames;
+
+	std::shared_ptr<Ort::Session> m_session;
+    Ort::Env env_;
+    Ort::SessionOptions session_options;
+public:
+
+	CTTransformer(const char* sz_model_dir, int thread_num);
+	~CTTransformer();
+	vector<int>  Infer(vector<int64_t> input_data);
+	string AddPunc(const char* sz_input);
+};
diff --git a/funasr/runtime/onnxruntime/src/e2e-vad.h b/funasr/runtime/onnxruntime/src/e2e-vad.h
new file mode 100644
index 0000000..e029dc3
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/e2e-vad.h
@@ -0,0 +1,797 @@
+
+#include <utility>
+#include <vector>
+#include <string>
+#include <map>
+#include <cmath>
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <cassert>
+
+
+enum class VadStateMachine {
+    kVadInStateStartPointNotDetected = 1,
+    kVadInStateInSpeechSegment = 2,
+    kVadInStateEndPointDetected = 3
+};
+
+enum class FrameState {
+    kFrameStateInvalid = -1,
+    kFrameStateSpeech = 1,
+    kFrameStateSil = 0
+};
+
+// final voice/unvoice state per frame
+enum class AudioChangeState {
+    kChangeStateSpeech2Speech = 0,
+    kChangeStateSpeech2Sil = 1,
+    kChangeStateSil2Sil = 2,
+    kChangeStateSil2Speech = 3,
+    kChangeStateNoBegin = 4,
+    kChangeStateInvalid = 5
+};
+
+enum class VadDetectMode {
+    kVadSingleUtteranceDetectMode = 0,
+    kVadMutipleUtteranceDetectMode = 1
+};
+
+class VADXOptions {
+public:
+    int sample_rate;
+    int detect_mode;
+    int snr_mode;
+    int max_end_silence_time;
+    int max_start_silence_time;
+    bool do_start_point_detection;
+    bool do_end_point_detection;
+    int window_size_ms;
+    int sil_to_speech_time_thres;
+    int speech_to_sil_time_thres;
+    float speech_2_noise_ratio;
+    int do_extend;
+    int lookback_time_start_point;
+    int lookahead_time_end_point;
+    int max_single_segment_time;
+    int nn_eval_block_size;
+    int dcd_block_size;
+    float snr_thres;
+    int noise_frame_num_used_for_snr;
+    float decibel_thres;
+    float speech_noise_thres;
+    float fe_prior_thres;
+    int silence_pdf_num;
+    std::vector<int> sil_pdf_ids;
+    float speech_noise_thresh_low;
+    float speech_noise_thresh_high;
+    bool output_frame_probs;
+    int frame_in_ms;
+    int frame_length_ms;
+
+    explicit VADXOptions(
+            int sr = 16000,
+            int dm = static_cast<int>(VadDetectMode::kVadMutipleUtteranceDetectMode),
+            int sm = 0,
+            int mset = 800,
+            int msst = 3000,
+            bool dspd = true,
+            bool depd = true,
+            int wsm = 200,
+            int ststh = 150,
+            int sttsh = 150,
+            float s2nr = 1.0,
+            int de = 1,
+            int lbtps = 200,
+            int latsp = 100,
+            int mss = 15000,
+            int nebs = 8,
+            int dbs = 4,
+            float st = -100.0,
+            int nfnus = 100,
+            float dt = -100.0,
+            float snt = 0.9,
+            float fept = 1e-4,
+            int spn = 1,
+            std::vector<int> spids = {0},
+            float sntl = -0.1,
+            float snth = 0.3,
+            bool ofp = false,
+            int fim = 10,
+            int flm = 25
+    ) :
+            sample_rate(sr),
+            detect_mode(dm),
+            snr_mode(sm),
+            max_end_silence_time(mset),
+            max_start_silence_time(msst),
+            do_start_point_detection(dspd),
+            do_end_point_detection(depd),
+            window_size_ms(wsm),
+            sil_to_speech_time_thres(ststh),
+            speech_to_sil_time_thres(sttsh),
+            speech_2_noise_ratio(s2nr),
+            do_extend(de),
+            lookback_time_start_point(lbtps),
+            lookahead_time_end_point(latsp),
+            max_single_segment_time(mss),
+            nn_eval_block_size(nebs),
+            dcd_block_size(dbs),
+            snr_thres(st),
+            noise_frame_num_used_for_snr(nfnus),
+            decibel_thres(dt),
+            speech_noise_thres(snt),
+            fe_prior_thres(fept),
+            silence_pdf_num(spn),
+            sil_pdf_ids(std::move(spids)),
+            speech_noise_thresh_low(sntl),
+            speech_noise_thresh_high(snth),
+            output_frame_probs(ofp),
+            frame_in_ms(fim),
+            frame_length_ms(flm) {}
+};
+
+class E2EVadSpeechBufWithDoa {
+public:
+    int start_ms;
+    int end_ms;
+    std::vector<float> buffer;
+    bool contain_seg_start_point;
+    bool contain_seg_end_point;
+    int doa;
+
+    E2EVadSpeechBufWithDoa() :
+            start_ms(0),
+            end_ms(0),
+            buffer(),
+            contain_seg_start_point(false),
+            contain_seg_end_point(false),
+            doa(0) {}
+
+    void Reset() {
+        start_ms = 0;
+        end_ms = 0;
+        buffer.clear();
+        contain_seg_start_point = false;
+        contain_seg_end_point = false;
+        doa = 0;
+    }
+};
+
+class E2EVadFrameProb {
+public:
+    double noise_prob;
+    double speech_prob;
+    double score;
+    int frame_id;
+    int frm_state;
+
+    E2EVadFrameProb() :
+            noise_prob(0.0),
+            speech_prob(0.0),
+            score(0.0),
+            frame_id(0),
+            frm_state(0) {}
+};
+
+class WindowDetector {
+public:
+    int window_size_ms;
+    int sil_to_speech_time;
+    int speech_to_sil_time;
+    int frame_size_ms;
+    int win_size_frame;
+    int win_sum;
+    std::vector<int> win_state;
+    int cur_win_pos;
+    FrameState pre_frame_state;
+    FrameState cur_frame_state;
+    int sil_to_speech_frmcnt_thres;
+    int speech_to_sil_frmcnt_thres;
+    int voice_last_frame_count;
+    int noise_last_frame_count;
+    int hydre_frame_count;
+
+    WindowDetector(int window_size_ms, int sil_to_speech_time, int speech_to_sil_time, int frame_size_ms) :
+            window_size_ms(window_size_ms),
+            sil_to_speech_time(sil_to_speech_time),
+            speech_to_sil_time(speech_to_sil_time),
+            frame_size_ms(frame_size_ms),
+            win_size_frame(window_size_ms / frame_size_ms),
+            win_sum(0),
+            win_state(std::vector<int>(win_size_frame, 0)),
+            cur_win_pos(0),
+            pre_frame_state(FrameState::kFrameStateSil),
+            cur_frame_state(FrameState::kFrameStateSil),
+            sil_to_speech_frmcnt_thres(sil_to_speech_time / frame_size_ms),
+            speech_to_sil_frmcnt_thres(speech_to_sil_time / frame_size_ms),
+            voice_last_frame_count(0),
+            noise_last_frame_count(0),
+            hydre_frame_count(0) {}
+
+    void Reset() {
+        cur_win_pos = 0;
+        win_sum = 0;
+        win_state = std::vector<int>(win_size_frame, 0);
+        pre_frame_state = FrameState::kFrameStateSil;
+        cur_frame_state = FrameState::kFrameStateSil;
+        voice_last_frame_count = 0;
+        noise_last_frame_count = 0;
+        hydre_frame_count = 0;
+    }
+
+    int GetWinSize() {
+        return win_size_frame;
+    }
+
+    AudioChangeState DetectOneFrame(FrameState frameState, int frame_count) {
+        int cur_frame_state = 0;
+        if (frameState == FrameState::kFrameStateSpeech) {
+            cur_frame_state = 1;
+        } else if (frameState == FrameState::kFrameStateSil) {
+            cur_frame_state = 0;
+        } else {
+            return AudioChangeState::kChangeStateInvalid;
+        }
+        win_sum -= win_state[cur_win_pos];
+        win_sum += cur_frame_state;
+        win_state[cur_win_pos] = cur_frame_state;
+        cur_win_pos = (cur_win_pos + 1) % win_size_frame;
+        if (pre_frame_state == FrameState::kFrameStateSil && win_sum >= sil_to_speech_frmcnt_thres) {
+            pre_frame_state = FrameState::kFrameStateSpeech;
+            return AudioChangeState::kChangeStateSil2Speech;
+        }
+        if (pre_frame_state == FrameState::kFrameStateSpeech && win_sum <= speech_to_sil_frmcnt_thres) {
+            pre_frame_state = FrameState::kFrameStateSil;
+            return AudioChangeState::kChangeStateSpeech2Sil;
+        }
+        if (pre_frame_state == FrameState::kFrameStateSil) {
+            return AudioChangeState::kChangeStateSil2Sil;
+        }
+        if (pre_frame_state == FrameState::kFrameStateSpeech) {
+            return AudioChangeState::kChangeStateSpeech2Speech;
+        }
+        return AudioChangeState::kChangeStateInvalid;
+    }
+
+    int FrameSizeMs() {
+        return frame_size_ms;
+    }
+};
+
+class E2EVadModel {
+public:
+    E2EVadModel() {
+        this->vad_opts = VADXOptions();
+//    this->windows_detector = WindowDetector(200,150,150,10);
+        // this->encoder = encoder;
+        // init variables
+        this->is_final = false;
+        this->data_buf_start_frame = 0;
+        this->frm_cnt = 0;
+        this->latest_confirmed_speech_frame = 0;
+        this->lastest_confirmed_silence_frame = -1;
+        this->continous_silence_frame_count = 0;
+        this->vad_state_machine = VadStateMachine::kVadInStateStartPointNotDetected;
+        this->confirmed_start_frame = -1;
+        this->confirmed_end_frame = -1;
+        this->number_end_time_detected = 0;
+        this->sil_frame = 0;
+        this->sil_pdf_ids = this->vad_opts.sil_pdf_ids;
+        this->noise_average_decibel = -100.0;
+        this->pre_end_silence_detected = false;
+        this->next_seg = true;
+//    this->output_data_buf = [];
+        this->output_data_buf_offset = 0;
+//    this->frame_probs = [];
+        this->max_end_sil_frame_cnt_thresh =
+                this->vad_opts.max_end_silence_time - this->vad_opts.speech_to_sil_time_thres;
+        this->speech_noise_thres = this->vad_opts.speech_noise_thres;
+        this->max_time_out = false;
+//    this->decibel = [];
+        this->ResetDetection();
+    }
+
+    std::vector<std::vector<int>>
+    operator()(const std::vector<std::vector<float>> &score, const std::vector<float> &waveform, bool is_final = false,
+               bool online = false, int max_end_sil = 800, int max_single_segment_time = 15000,
+               float speech_noise_thres = 0.8, int sample_rate = 16000) {
+        max_end_sil_frame_cnt_thresh = max_end_sil - vad_opts.speech_to_sil_time_thres;
+        this->waveform = waveform;
+        this->vad_opts.max_single_segment_time = max_single_segment_time;
+        this->vad_opts.speech_noise_thres = speech_noise_thres;
+        this->vad_opts.sample_rate = sample_rate;
+
+        ComputeDecibel();
+        ComputeScores(score);
+        if (!is_final) {
+            DetectCommonFrames();
+        } else {
+            DetectLastFrames();
+        }
+
+        std::vector<std::vector<int>> segment_batch;
+        if (output_data_buf.size() > 0) {
+            for (size_t i = output_data_buf_offset; i < output_data_buf.size(); i++) {
+              int start_ms;
+              int end_ms;
+              if (online) {
+
+                if (!output_data_buf[i].contain_seg_start_point) {
+                  continue;
+                }
+                if (!next_seg && !output_data_buf[i].contain_seg_end_point) {
+                  continue;
+                }
+                start_ms = next_seg ? output_data_buf[i].start_ms : -1;
+
+                if (output_data_buf[i].contain_seg_end_point) {
+                  end_ms = output_data_buf[i].end_ms;
+                  next_seg = true;
+                  output_data_buf_offset += 1;
+                } else {
+                  end_ms = -1;
+                  next_seg = false;
+                }
+              } else {
+                if (!is_final &&
+                    (!output_data_buf[i].contain_seg_start_point || !output_data_buf[i].contain_seg_end_point)) {
+                  continue;
+                }
+                start_ms = output_data_buf[i].start_ms;
+                end_ms = output_data_buf[i].end_ms;
+                output_data_buf_offset += 1;
+              }
+                std::vector<int> segment = {start_ms, end_ms};
+                segment_batch.push_back(segment);
+            }
+        }
+
+        if (is_final) {
+            AllResetDetection();
+        }
+        return segment_batch;
+    }
+
+private:
+    VADXOptions vad_opts;
+    WindowDetector windows_detector = WindowDetector(200, 150, 150, 10);
+    bool is_final;
+    int data_buf_start_frame;
+    int frm_cnt;
+    int latest_confirmed_speech_frame;
+    int lastest_confirmed_silence_frame;
+    int continous_silence_frame_count;
+    VadStateMachine vad_state_machine;
+    int confirmed_start_frame;
+    int confirmed_end_frame;
+    int number_end_time_detected;
+    int sil_frame;
+    std::vector<int> sil_pdf_ids;
+    float noise_average_decibel;
+    bool pre_end_silence_detected;
+    bool next_seg;
+    std::vector<E2EVadSpeechBufWithDoa> output_data_buf;
+    int output_data_buf_offset;
+    std::vector<E2EVadFrameProb> frame_probs;
+    int max_end_sil_frame_cnt_thresh;
+    float speech_noise_thres;
+    std::vector<std::vector<float>> scores;
+    bool max_time_out;
+    std::vector<float> decibel;
+    std::vector<float> data_buf;
+    std::vector<float> data_buf_all;
+    std::vector<float> waveform;
+
+    void AllResetDetection() {
+        is_final = false;
+        data_buf_start_frame = 0;
+        frm_cnt = 0;
+        latest_confirmed_speech_frame = 0;
+        lastest_confirmed_silence_frame = -1;
+        continous_silence_frame_count = 0;
+        vad_state_machine = VadStateMachine::kVadInStateStartPointNotDetected;
+        confirmed_start_frame = -1;
+        confirmed_end_frame = -1;
+        number_end_time_detected = 0;
+        sil_frame = 0;
+        sil_pdf_ids = vad_opts.sil_pdf_ids;
+        noise_average_decibel = -100.0;
+        pre_end_silence_detected = false;
+        next_seg = true;
+        output_data_buf.clear();
+        output_data_buf_offset = 0;
+        frame_probs.clear();
+        max_end_sil_frame_cnt_thresh = vad_opts.max_end_silence_time - vad_opts.speech_to_sil_time_thres;
+        speech_noise_thres = vad_opts.speech_noise_thres;
+        scores.clear();
+        max_time_out = false;
+        decibel.clear();
+        data_buf.clear();
+        data_buf_all.clear();
+        waveform.clear();
+        ResetDetection();
+    }
+
+    void ResetDetection() {
+        continous_silence_frame_count = 0;
+        latest_confirmed_speech_frame = 0;
+        lastest_confirmed_silence_frame = -1;
+        confirmed_start_frame = -1;
+        confirmed_end_frame = -1;
+        vad_state_machine = VadStateMachine::kVadInStateStartPointNotDetected;
+        windows_detector.Reset();
+        sil_frame = 0;
+        frame_probs.clear();
+    }
+
+    void ComputeDecibel() {
+        int frame_sample_length = int(vad_opts.frame_length_ms * vad_opts.sample_rate / 1000);
+        int frame_shift_length = int(vad_opts.frame_in_ms * vad_opts.sample_rate / 1000);
+        if (data_buf_all.empty()) {
+            data_buf_all = waveform;
+            data_buf = data_buf_all;
+        } else {
+            data_buf_all.insert(data_buf_all.end(), waveform.begin(), waveform.end());
+        }
+        for (int offset = 0; offset < waveform.size() - frame_sample_length + 1; offset += frame_shift_length) {
+            float sum = 0.0;
+            for (int i = 0; i < frame_sample_length; i++) {
+                sum += waveform[offset + i] * waveform[offset + i];
+            }
+//      float decibel = 10 * log10(sum + 0.000001);
+            this->decibel.push_back(10 * log10(sum + 0.000001));
+        }
+    }
+
+    void ComputeScores(const std::vector<std::vector<float>> &scores) {
+        vad_opts.nn_eval_block_size = scores.size();
+        frm_cnt += scores.size();
+        if (this->scores.empty()) {
+            this->scores = scores;  // the first calculation
+        } else {
+            this->scores.insert(this->scores.end(), scores.begin(), scores.end());
+        }
+    }
+
+    void PopDataBufTillFrame(int frame_idx) {
+      int frame_sample_length = int(vad_opts.frame_in_ms * vad_opts.sample_rate / 1000);
+      int start_pos=-1;
+      int data_length= data_buf.size();
+      while (data_buf_start_frame < frame_idx) {
+        if (data_length >= frame_sample_length) {
+          data_buf_start_frame += 1;
+          start_pos= data_buf_start_frame* frame_sample_length;
+          data_length=data_buf_all.size()-start_pos;
+        } else {
+          break;
+        }
+      }
+      if (start_pos!=-1){
+        data_buf.resize(data_length);
+        std::copy(data_buf_all.begin() + start_pos, data_buf_all.end(), data_buf.begin());
+      }
+    }
+
+    void PopDataToOutputBuf(int start_frm, int frm_cnt, bool first_frm_is_start_point, bool last_frm_is_end_point,
+                            bool end_point_is_sent_end) {
+        PopDataBufTillFrame(start_frm);
+        int expected_sample_number = int(frm_cnt * vad_opts.sample_rate * vad_opts.frame_in_ms / 1000);
+        if (last_frm_is_end_point) {
+            int extra_sample = std::max(0, int(vad_opts.frame_length_ms * vad_opts.sample_rate / 1000 -
+                                               vad_opts.sample_rate * vad_opts.frame_in_ms / 1000));
+            expected_sample_number += int(extra_sample);
+        }
+        if (end_point_is_sent_end) {
+            expected_sample_number = std::max(expected_sample_number, int(data_buf.size()));
+        }
+        if (data_buf.size() < expected_sample_number) {
+            std::cout << "error in calling pop data_buf\n";
+        }
+        if (output_data_buf.size() == 0 || first_frm_is_start_point) {
+            output_data_buf.push_back(E2EVadSpeechBufWithDoa());
+            output_data_buf[output_data_buf.size() - 1].Reset();
+            output_data_buf[output_data_buf.size() - 1].start_ms = start_frm * vad_opts.frame_in_ms;
+            output_data_buf[output_data_buf.size() - 1].end_ms = output_data_buf[output_data_buf.size() - 1].start_ms;
+            output_data_buf[output_data_buf.size() - 1].doa = 0;
+        }
+        E2EVadSpeechBufWithDoa &cur_seg = output_data_buf.back();
+        if (cur_seg.end_ms != start_frm * vad_opts.frame_in_ms) {
+            std::cout << "warning\n";
+        }
+        int out_pos = (int) cur_seg.buffer.size();
+        int data_to_pop;
+        if (end_point_is_sent_end) {
+            data_to_pop = expected_sample_number;
+        } else {
+            data_to_pop = int(frm_cnt * vad_opts.frame_in_ms * vad_opts.sample_rate / 1000);
+        }
+        if (data_to_pop > int(data_buf.size())) {
+            std::cout << "VAD data_to_pop is bigger than data_buf.size()!!!\n";
+            data_to_pop = (int) data_buf.size();
+            expected_sample_number = (int) data_buf.size();
+        }
+        cur_seg.doa = 0;
+        for (int sample_cpy_out = 0; sample_cpy_out < data_to_pop; sample_cpy_out++) {
+            cur_seg.buffer.push_back(data_buf.back());
+            out_pos++;
+        }
+        for (int sample_cpy_out = data_to_pop; sample_cpy_out < expected_sample_number; sample_cpy_out++) {
+            cur_seg.buffer.push_back(data_buf.back());
+            out_pos++;
+        }
+        if (cur_seg.end_ms != start_frm * vad_opts.frame_in_ms) {
+            std::cout << "Something wrong with the VAD algorithm\n";
+        }
+        data_buf_start_frame += frm_cnt;
+        cur_seg.end_ms = (start_frm + frm_cnt) * vad_opts.frame_in_ms;
+        if (first_frm_is_start_point) {
+            cur_seg.contain_seg_start_point = true;
+        }
+        if (last_frm_is_end_point) {
+            cur_seg.contain_seg_end_point = true;
+        }
+    }
+
+    void OnSilenceDetected(int valid_frame) {
+        lastest_confirmed_silence_frame = valid_frame;
+        if (vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+            PopDataBufTillFrame(valid_frame);
+        }
+        // silence_detected_callback_
+        // pass
+    }
+
+    void OnVoiceDetected(int valid_frame) {
+        latest_confirmed_speech_frame = valid_frame;
+        PopDataToOutputBuf(valid_frame, 1, false, false, false);
+    }
+
+    void OnVoiceStart(int start_frame, bool fake_result = false) {
+        if (vad_opts.do_start_point_detection) {
+            // pass
+        }
+        if (confirmed_start_frame != -1) {
+            std::cout << "not reset vad properly\n";
+        } else {
+            confirmed_start_frame = start_frame;
+        }
+        if (!fake_result && vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+            PopDataToOutputBuf(confirmed_start_frame, 1, true, false, false);
+        }
+    }
+
+
+    void OnVoiceEnd(int end_frame, bool fake_result, bool is_last_frame) {
+        for (int t = latest_confirmed_speech_frame + 1; t < end_frame; t++) {
+            OnVoiceDetected(t);
+        }
+        if (vad_opts.do_end_point_detection) {
+            // pass
+        }
+        if (confirmed_end_frame != -1) {
+            std::cout << "not reset vad properly\n";
+        } else {
+            confirmed_end_frame = end_frame;
+        }
+        if (!fake_result) {
+            sil_frame = 0;
+            PopDataToOutputBuf(confirmed_end_frame, 1, false, true, is_last_frame);
+        }
+        number_end_time_detected++;
+    }
+
+    void MaybeOnVoiceEndIfLastFrame(bool is_final_frame, int cur_frm_idx) {
+        if (is_final_frame) {
+            OnVoiceEnd(cur_frm_idx, false, true);
+            vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+        }
+    }
+
+    int GetLatency() {
+        return int(LatencyFrmNumAtStartPoint() * vad_opts.frame_in_ms);
+    }
+
+    int LatencyFrmNumAtStartPoint() {
+        int vad_latency = windows_detector.GetWinSize();
+        if (vad_opts.do_extend) {
+            vad_latency += int(vad_opts.lookback_time_start_point / vad_opts.frame_in_ms);
+        }
+        return vad_latency;
+    }
+
+    FrameState GetFrameState(int t) {
+        FrameState frame_state = FrameState::kFrameStateInvalid;
+        float cur_decibel = decibel[t];
+        float cur_snr = cur_decibel - noise_average_decibel;
+        if (cur_decibel < vad_opts.decibel_thres) {
+            frame_state = FrameState::kFrameStateSil;
+            DetectOneFrame(frame_state, t, false);
+            return frame_state;
+        }
+        float sum_score = 0.0;
+        float noise_prob = 0.0;
+        assert(sil_pdf_ids.size() == vad_opts.silence_pdf_num);
+        if (sil_pdf_ids.size() > 0) {
+            std::vector<float> sil_pdf_scores;
+            for (auto sil_pdf_id: sil_pdf_ids) {
+                sil_pdf_scores.push_back(scores[t][sil_pdf_id]);
+            }
+            sum_score = accumulate(sil_pdf_scores.begin(), sil_pdf_scores.end(), 0.0);
+            noise_prob = log(sum_score) * vad_opts.speech_2_noise_ratio;
+            float total_score = 1.0;
+            sum_score = total_score - sum_score;
+        }
+        float speech_prob = log(sum_score);
+        if (vad_opts.output_frame_probs) {
+            E2EVadFrameProb frame_prob;
+            frame_prob.noise_prob = noise_prob;
+            frame_prob.speech_prob = speech_prob;
+            frame_prob.score = sum_score;
+            frame_prob.frame_id = t;
+            frame_probs.push_back(frame_prob);
+        }
+        if (exp(speech_prob) >= exp(noise_prob) + speech_noise_thres) {
+            if (cur_snr >= vad_opts.snr_thres && cur_decibel >= vad_opts.decibel_thres) {
+                frame_state = FrameState::kFrameStateSpeech;
+            } else {
+                frame_state = FrameState::kFrameStateSil;
+            }
+        } else {
+            frame_state = FrameState::kFrameStateSil;
+            if (noise_average_decibel < -99.9) {
+                noise_average_decibel = cur_decibel;
+            } else {
+                noise_average_decibel =
+                        (cur_decibel + noise_average_decibel * (vad_opts.noise_frame_num_used_for_snr - 1)) /
+                        vad_opts.noise_frame_num_used_for_snr;
+            }
+        }
+        return frame_state;
+    }
+
+    int DetectCommonFrames() {
+        if (vad_state_machine == VadStateMachine::kVadInStateEndPointDetected) {
+            return 0;
+        }
+        for (int i = vad_opts.nn_eval_block_size - 1; i >= 0; i--) {
+            FrameState frame_state = FrameState::kFrameStateInvalid;
+            frame_state = GetFrameState(frm_cnt - 1 - i);
+            DetectOneFrame(frame_state, frm_cnt - 1 - i, false);
+        }
+        return 0;
+    }
+
+    int DetectLastFrames() {
+        if (vad_state_machine == VadStateMachine::kVadInStateEndPointDetected) {
+            return 0;
+        }
+        for (int i = vad_opts.nn_eval_block_size - 1; i >= 0; i--) {
+            FrameState frame_state = FrameState::kFrameStateInvalid;
+            frame_state = GetFrameState(frm_cnt - 1 - i);
+            if (i != 0) {
+                DetectOneFrame(frame_state, frm_cnt - 1 - i, false);
+            } else {
+                DetectOneFrame(frame_state, frm_cnt - 1, true);
+            }
+        }
+        return 0;
+    }
+
+    void DetectOneFrame(FrameState cur_frm_state, int cur_frm_idx, bool is_final_frame) {
+        FrameState tmp_cur_frm_state = FrameState::kFrameStateInvalid;
+        if (cur_frm_state == FrameState::kFrameStateSpeech) {
+            if (std::fabs(1.0) > vad_opts.fe_prior_thres) {
+                tmp_cur_frm_state = FrameState::kFrameStateSpeech;
+            } else {
+                tmp_cur_frm_state = FrameState::kFrameStateSil;
+            }
+        } else if (cur_frm_state == FrameState::kFrameStateSil) {
+            tmp_cur_frm_state = FrameState::kFrameStateSil;
+        }
+        AudioChangeState state_change = windows_detector.DetectOneFrame(tmp_cur_frm_state, cur_frm_idx);
+        int frm_shift_in_ms = vad_opts.frame_in_ms;
+        if (AudioChangeState::kChangeStateSil2Speech == state_change) {
+            int silence_frame_count = continous_silence_frame_count;
+            continous_silence_frame_count = 0;
+            pre_end_silence_detected = false;
+            int start_frame = 0;
+            if (vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+                start_frame = std::max(data_buf_start_frame, cur_frm_idx - LatencyFrmNumAtStartPoint());
+                OnVoiceStart(start_frame);
+                vad_state_machine = VadStateMachine::kVadInStateInSpeechSegment;
+                for (int t = start_frame + 1; t <= cur_frm_idx; t++) {
+                    OnVoiceDetected(t);
+                }
+            } else if (vad_state_machine == VadStateMachine::kVadInStateInSpeechSegment) {
+                for (int t = latest_confirmed_speech_frame + 1; t < cur_frm_idx; t++) {
+                    OnVoiceDetected(t);
+                }
+                if (cur_frm_idx - confirmed_start_frame + 1 > vad_opts.max_single_segment_time / frm_shift_in_ms) {
+                    OnVoiceEnd(cur_frm_idx, false, false);
+                    vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+                } else if (!is_final_frame) {
+                    OnVoiceDetected(cur_frm_idx);
+                } else {
+                    MaybeOnVoiceEndIfLastFrame(is_final_frame, cur_frm_idx);
+                }
+            }
+        } else if (AudioChangeState::kChangeStateSpeech2Sil == state_change) {
+            continous_silence_frame_count = 0;
+            if (vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+                // do nothing
+            } else if (vad_state_machine == VadStateMachine::kVadInStateInSpeechSegment) {
+                if (cur_frm_idx - confirmed_start_frame + 1 >
+                    vad_opts.max_single_segment_time / frm_shift_in_ms) {
+                    OnVoiceEnd(cur_frm_idx, false, false);
+                    vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+                } else if (!is_final_frame) {
+                    OnVoiceDetected(cur_frm_idx);
+                } else {
+                    MaybeOnVoiceEndIfLastFrame(is_final_frame, cur_frm_idx);
+                }
+            }
+        } else if (AudioChangeState::kChangeStateSpeech2Speech == state_change) {
+            continous_silence_frame_count = 0;
+            if (vad_state_machine == VadStateMachine::kVadInStateInSpeechSegment) {
+                if (cur_frm_idx - confirmed_start_frame + 1 >
+                    vad_opts.max_single_segment_time / frm_shift_in_ms) {
+                    max_time_out = true;
+                    OnVoiceEnd(cur_frm_idx, false, false);
+                    vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+                } else if (!is_final_frame) {
+                    OnVoiceDetected(cur_frm_idx);
+                } else {
+                    MaybeOnVoiceEndIfLastFrame(is_final_frame, cur_frm_idx);
+                }
+            }
+        } else if (AudioChangeState::kChangeStateSil2Sil == state_change) {
+            continous_silence_frame_count += 1;
+            if (vad_state_machine == VadStateMachine::kVadInStateStartPointNotDetected) {
+                if ((vad_opts.detect_mode == static_cast<int>(VadDetectMode::kVadSingleUtteranceDetectMode) &&
+                     (continous_silence_frame_count * frm_shift_in_ms > vad_opts.max_start_silence_time)) ||
+                    (is_final_frame && number_end_time_detected == 0)) {
+                    for (int t = lastest_confirmed_silence_frame + 1; t < cur_frm_idx; t++) {
+                        OnSilenceDetected(t);
+                    }
+                    OnVoiceStart(0, true);
+                    OnVoiceEnd(0, true, false);
+                    vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+                } else {
+                    if (cur_frm_idx >= LatencyFrmNumAtStartPoint()) {
+                        OnSilenceDetected(cur_frm_idx - LatencyFrmNumAtStartPoint());
+                    }
+                }
+            } else if (vad_state_machine == VadStateMachine::kVadInStateInSpeechSegment) {
+                if (continous_silence_frame_count * frm_shift_in_ms >= max_end_sil_frame_cnt_thresh) {
+                    int lookback_frame = max_end_sil_frame_cnt_thresh / frm_shift_in_ms;
+                    if (vad_opts.do_extend) {
+                        lookback_frame -= vad_opts.lookahead_time_end_point / frm_shift_in_ms;
+                        lookback_frame -= 1;
+                        lookback_frame = std::max(0, lookback_frame);
+                    }
+                    OnVoiceEnd(cur_frm_idx - lookback_frame, false, false);
+                    vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+                } else if (cur_frm_idx - confirmed_start_frame + 1 >
+                           vad_opts.max_single_segment_time / frm_shift_in_ms) {
+                    OnVoiceEnd(cur_frm_idx, false, false);
+                    vad_state_machine = VadStateMachine::kVadInStateEndPointDetected;
+                } else if (vad_opts.do_extend && !is_final_frame) {
+                    if (continous_silence_frame_count <= vad_opts.lookahead_time_end_point / frm_shift_in_ms) {
+                        OnVoiceDetected(cur_frm_idx);
+                    }
+                } else {
+                    MaybeOnVoiceEndIfLastFrame(is_final_frame, cur_frm_idx);
+                }
+            }
+        }
+        if (vad_state_machine == VadStateMachine::kVadInStateEndPointDetected &&
+            vad_opts.detect_mode == static_cast<int>(VadDetectMode::kVadMutipleUtteranceDetectMode)) {
+            ResetDetection();
+        }
+    }
+
+};
+
+
+
diff --git a/funasr/runtime/onnxruntime/src/fsmn-vad.cpp b/funasr/runtime/onnxruntime/src/fsmn-vad.cpp
new file mode 100644
index 0000000..0f87cb2
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/fsmn-vad.cpp
@@ -0,0 +1,273 @@
+
+#include <fstream>
+#include "precomp.h"
+//#include "glog/logging.h"
+
+
+void FsmnVad::InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
+                       float vad_speech_noise_thres) {
+    session_options_.SetIntraOpNumThreads(1);
+    session_options_.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
+    session_options_.DisableCpuMemArena();
+    this->vad_sample_rate_ = vad_sample_rate;
+    this->vad_silence_duration_=vad_silence_duration;
+    this->vad_max_len_=vad_max_len;
+    this->vad_speech_noise_thres_=vad_speech_noise_thres;
+
+    ReadModel(vad_model);
+    LoadCmvn(vad_cmvn.c_str());
+    InitCache();
+
+    fbank_opts.frame_opts.dither = 0;
+    fbank_opts.mel_opts.num_bins = 80;
+    fbank_opts.frame_opts.samp_freq = vad_sample_rate;
+    fbank_opts.frame_opts.window_type = "hamming";
+    fbank_opts.frame_opts.frame_shift_ms = 10;
+    fbank_opts.frame_opts.frame_length_ms = 25;
+    fbank_opts.energy_floor = 0;
+    fbank_opts.mel_opts.debug_mel = false;
+
+}
+
+void FsmnVad::ReadModel(const std::string &vad_model) {
+    try {
+        vad_session_ = std::make_shared<Ort::Session>(
+                env_, vad_model.c_str(), session_options_);
+    } catch (std::exception const &e) {
+        //LOG(ERROR) << "Error when load onnx model: " << e.what();
+        exit(0);
+    }
+    //LOG(INFO) << "vad onnx:";
+    GetInputOutputInfo(vad_session_, &vad_in_names_, &vad_out_names_);
+}
+
+void FsmnVad::GetInputOutputInfo(
+        const std::shared_ptr<Ort::Session> &session,
+        std::vector<const char *> *in_names, std::vector<const char *> *out_names) {
+    Ort::AllocatorWithDefaultOptions allocator;
+    // Input info
+    int num_nodes = session->GetInputCount();
+    in_names->resize(num_nodes);
+    for (int i = 0; i < num_nodes; ++i) {
+        std::unique_ptr<char, Ort::detail::AllocatedFree> name = session->GetInputNameAllocated(i, allocator);
+        Ort::TypeInfo type_info = session->GetInputTypeInfo(i);
+        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
+        ONNXTensorElementDataType type = tensor_info.GetElementType();
+        std::vector<int64_t> node_dims = tensor_info.GetShape();
+        std::stringstream shape;
+        for (auto j: node_dims) {
+            shape << j;
+            shape << " ";
+        }
+        // LOG(INFO) << "\tInput " << i << " : name=" << name.get() << " type=" << type
+        //           << " dims=" << shape.str();
+        (*in_names)[i] = name.get();
+        name.release();
+    }
+    // Output info
+    num_nodes = session->GetOutputCount();
+    out_names->resize(num_nodes);
+    for (int i = 0; i < num_nodes; ++i) {
+        std::unique_ptr<char, Ort::detail::AllocatedFree> name = session->GetOutputNameAllocated(i, allocator);
+        Ort::TypeInfo type_info = session->GetOutputTypeInfo(i);
+        auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
+        ONNXTensorElementDataType type = tensor_info.GetElementType();
+        std::vector<int64_t> node_dims = tensor_info.GetShape();
+        std::stringstream shape;
+        for (auto j: node_dims) {
+            shape << j;
+            shape << " ";
+        }
+        // LOG(INFO) << "\tOutput " << i << " : name=" << name.get() << " type=" << type
+        //           << " dims=" << shape.str();
+        (*out_names)[i] = name.get();
+        name.release();
+    }
+}
+
+
+void FsmnVad::Forward(
+        const std::vector<std::vector<float>> &chunk_feats,
+        std::vector<std::vector<float>> *out_prob) {
+    Ort::MemoryInfo memory_info =
+            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+
+    int num_frames = chunk_feats.size();
+    const int feature_dim = chunk_feats[0].size();
+
+    //  2. Generate input nodes tensor
+    // vad node { batch,frame number,feature dim }
+    const int64_t vad_feats_shape[3] = {1, num_frames, feature_dim};
+    std::vector<float> vad_feats;
+    for (const auto &chunk_feat: chunk_feats) {
+        vad_feats.insert(vad_feats.end(), chunk_feat.begin(), chunk_feat.end());
+    }
+    Ort::Value vad_feats_ort = Ort::Value::CreateTensor<float>(
+            memory_info, vad_feats.data(), vad_feats.size(), vad_feats_shape, 3);
+    
+    // 3. Put nodes into onnx input vector
+    std::vector<Ort::Value> vad_inputs;
+    vad_inputs.emplace_back(std::move(vad_feats_ort));
+    // 4 caches
+    // cache node {batch,128,19,1}
+    const int64_t cache_feats_shape[4] = {1, 128, 19, 1};
+    for (int i = 0; i < in_cache_.size(); i++) {
+      vad_inputs.emplace_back(std::move(Ort::Value::CreateTensor<float>(
+              memory_info, in_cache_[i].data(), in_cache_[i].size(), cache_feats_shape, 4)));
+    }
+  
+    // 4. Onnx infer
+    std::vector<Ort::Value> vad_ort_outputs;
+    try {
+        // VLOG(3) << "Start infer";
+        vad_ort_outputs = vad_session_->Run(
+                Ort::RunOptions{nullptr}, vad_in_names_.data(), vad_inputs.data(),
+                vad_inputs.size(), vad_out_names_.data(), vad_out_names_.size());
+    } catch (std::exception const &e) {
+        // LOG(ERROR) << e.what();
+        return;
+    }
+
+    // 5. Change infer result to output shapes
+    float *logp_data = vad_ort_outputs[0].GetTensorMutableData<float>();
+    auto type_info = vad_ort_outputs[0].GetTensorTypeAndShapeInfo();
+
+    int num_outputs = type_info.GetShape()[1];
+    int output_dim = type_info.GetShape()[2];
+    out_prob->resize(num_outputs);
+    for (int i = 0; i < num_outputs; i++) {
+        (*out_prob)[i].resize(output_dim);
+        memcpy((*out_prob)[i].data(), logp_data + i * output_dim,
+               sizeof(float) * output_dim);
+    }
+  
+    // get 4 caches outputs,each size is 128*19
+    for (int i = 1; i < 5; i++) {
+      float* data = vad_ort_outputs[i].GetTensorMutableData<float>();
+      memcpy(in_cache_[i-1].data(), data, sizeof(float) * 128*19);
+    }
+}
+
+void FsmnVad::FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
+                         const std::vector<float> &waves) {
+    knf::OnlineFbank fbank(fbank_opts);
+
+    fbank.AcceptWaveform(sample_rate, &waves[0], waves.size());
+    int32_t frames = fbank.NumFramesReady();
+    for (int32_t i = 0; i != frames; ++i) {
+        const float *frame = fbank.GetFrame(i);
+        std::vector<float> frame_vector(frame, frame + fbank_opts.mel_opts.num_bins);
+        vad_feats.emplace_back(frame_vector);
+    }
+}
+
+void FsmnVad::LoadCmvn(const char *filename)
+{
+    using namespace std;
+    ifstream cmvn_stream(filename);
+    string line;
+
+    while (getline(cmvn_stream, line)) {
+        istringstream iss(line);
+        vector<string> line_item{istream_iterator<string>{iss}, istream_iterator<string>{}};
+        if (line_item[0] == "<AddShift>") {
+            getline(cmvn_stream, line);
+            istringstream means_lines_stream(line);
+            vector<string> means_lines{istream_iterator<string>{means_lines_stream}, istream_iterator<string>{}};
+            if (means_lines[0] == "<LearnRateCoef>") {
+                for (int j = 3; j < means_lines.size() - 1; j++) {
+                    means_list.push_back(stof(means_lines[j]));
+                }
+                continue;
+            }
+        }
+        else if (line_item[0] == "<Rescale>") {
+            getline(cmvn_stream, line);
+            istringstream vars_lines_stream(line);
+            vector<string> vars_lines{istream_iterator<string>{vars_lines_stream}, istream_iterator<string>{}};
+            if (vars_lines[0] == "<LearnRateCoef>") {
+                for (int j = 3; j < vars_lines.size() - 1; j++) {
+                    // vars_list.push_back(stof(vars_lines[j])*scale);
+                    vars_list.push_back(stof(vars_lines[j]));
+                }
+                continue;
+            }
+        }
+    }
+}
+
+std::vector<std::vector<float>> &FsmnVad::LfrCmvn(std::vector<std::vector<float>> &vad_feats, int lfr_m, int lfr_n) {
+
+    std::vector<std::vector<float>> out_feats;
+    int T = vad_feats.size();
+    int T_lrf = ceil(1.0 * T / lfr_n);
+
+    // Pad frames at start(copy first frame)
+    for (int i = 0; i < (lfr_m - 1) / 2; i++) {
+        vad_feats.insert(vad_feats.begin(), vad_feats[0]);
+    }
+    // Merge lfr_m frames as one,lfr_n frames per window
+    T = T + (lfr_m - 1) / 2;
+    std::vector<float> p;
+    for (int i = 0; i < T_lrf; i++) {
+        if (lfr_m <= T - i * lfr_n) {
+            for (int j = 0; j < lfr_m; j++) {
+                p.insert(p.end(), vad_feats[i * lfr_n + j].begin(), vad_feats[i * lfr_n + j].end());
+            }
+            out_feats.emplace_back(p);
+            p.clear();
+        } else {
+            // Fill to lfr_m frames at last window if less than lfr_m frames  (copy last frame)
+            int num_padding = lfr_m - (T - i * lfr_n);
+            for (int j = 0; j < (vad_feats.size() - i * lfr_n); j++) {
+                p.insert(p.end(), vad_feats[i * lfr_n + j].begin(), vad_feats[i * lfr_n + j].end());
+            }
+            for (int j = 0; j < num_padding; j++) {
+                p.insert(p.end(), vad_feats[vad_feats.size() - 1].begin(), vad_feats[vad_feats.size() - 1].end());
+            }
+            out_feats.emplace_back(p);
+        }
+    }
+    // Apply cmvn
+    for (auto &out_feat: out_feats) {
+        for (int j = 0; j < means_list.size(); j++) {
+            out_feat[j] = (out_feat[j] + means_list[j]) * vars_list[j];
+        }
+    }
+    vad_feats = out_feats;
+    return vad_feats;
+}
+
+std::vector<std::vector<int>>
+FsmnVad::Infer(const std::vector<float> &waves) {
+    std::vector<std::vector<float>> vad_feats;
+    std::vector<std::vector<float>> vad_probs;
+    FbankKaldi(vad_sample_rate_, vad_feats, waves);
+    vad_feats = LfrCmvn(vad_feats, 5, 1);
+    Forward(vad_feats, &vad_probs);
+
+    E2EVadModel vad_scorer = E2EVadModel();
+    std::vector<std::vector<int>> vad_segments;
+    vad_segments = vad_scorer(vad_probs, waves, true, false, vad_silence_duration_, vad_max_len_,
+                              vad_speech_noise_thres_, vad_sample_rate_);
+    return vad_segments;
+
+}
+
+void FsmnVad::InitCache(){
+  std::vector<float> cache_feats(128 * 19 * 1, 0);
+  for (int i=0;i<4;i++){
+    in_cache_.emplace_back(cache_feats);
+  }
+};
+
+void FsmnVad::Reset(){
+  in_cache_.clear();
+  InitCache();
+};
+
+void FsmnVad::Test() {
+}
+
+FsmnVad::FsmnVad():env_(ORT_LOGGING_LEVEL_ERROR, ""),session_options_{} {
+}
diff --git a/funasr/runtime/onnxruntime/src/fsmn-vad.h b/funasr/runtime/onnxruntime/src/fsmn-vad.h
new file mode 100644
index 0000000..e8569f9
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/fsmn-vad.h
@@ -0,0 +1,60 @@
+
+#ifndef VAD_SERVER_FSMNVAD_H
+#define VAD_SERVER_FSMNVAD_H
+
+#include "precomp.h"
+
+class FsmnVad {
+/**
+ * Author: Speech Lab of DAMO Academy, Alibaba Group
+ * Deep-FSMN for Large Vocabulary Continuous Speech Recognition
+ * https://arxiv.org/abs/1803.05030
+*/
+
+public:
+    FsmnVad();
+    void Test();
+    void InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
+                  float vad_speech_noise_thres);
+
+    std::vector<std::vector<int>> Infer(const std::vector<float> &waves);
+    void Reset();
+
+private:
+
+    void ReadModel(const std::string &vad_model);
+
+    static void GetInputOutputInfo(
+            const std::shared_ptr<Ort::Session> &session,
+            std::vector<const char *> *in_names, std::vector<const char *> *out_names);
+
+    void FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
+                    const std::vector<float> &waves);
+
+    std::vector<std::vector<float>> &LfrCmvn(std::vector<std::vector<float>> &vad_feats, int lfr_m, int lfr_n);
+
+    void Forward(
+            const std::vector<std::vector<float>> &chunk_feats,
+            std::vector<std::vector<float>> *out_prob);
+
+    void LoadCmvn(const char *filename);
+    void InitCache();
+
+    std::shared_ptr<Ort::Session> vad_session_ = nullptr;
+    Ort::Env env_;
+    Ort::SessionOptions session_options_;
+    std::vector<const char *> vad_in_names_;
+    std::vector<const char *> vad_out_names_;
+    std::vector<std::vector<float>> in_cache_;
+    
+    knf::FbankOptions fbank_opts;
+    std::vector<float> means_list;
+    std::vector<float> vars_list;
+    int vad_sample_rate_ = 16000;
+    int vad_silence_duration_ = 800;
+    int vad_max_len_ = 15000;
+    double vad_speech_noise_thres_ = 0.9;
+};
+
+
+#endif //VAD_SERVER_FSMNVAD_H
diff --git a/funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp b/funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp
new file mode 100644
index 0000000..1d822a0
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/funasr-onnx-offline-rtf.cpp
@@ -0,0 +1,140 @@
+
+#ifndef _WIN32
+#include <sys/time.h>
+#else
+#include <win_func.h>
+#endif
+
+#include "libfunasrapi.h"
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+#include <atomic>
+#include <mutex>
+#include <thread>
+using namespace std;
+
+std::atomic<int> index(0);
+std::mutex mtx;
+
+void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list, 
+            float* total_length, long* total_time, int core_id) {
+    
+    struct timeval start, end;
+    long seconds = 0;
+    float n_total_length = 0.0f;
+    long n_total_time = 0;
+    
+    // warm up
+    for (size_t i = 0; i < 1; i++)
+    {
+        FUNASR_RESULT result=FunASRRecogFile(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL);
+    }
+
+    while (true) {
+        // 浣跨敤鍘熷瓙鍙橀噺鑾峰彇绱㈠紩骞堕�掑
+        int i = index.fetch_add(1);
+        if (i >= wav_list.size()) {
+            break;
+        }
+
+        gettimeofday(&start, NULL);
+        FUNASR_RESULT result=FunASRRecogFile(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL);
+
+        gettimeofday(&end, NULL);
+        seconds = (end.tv_sec - start.tv_sec);
+        long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
+        n_total_time += taking_micros;
+
+        if(result){
+            string msg = FunASRGetResult(result, 0);
+            printf("Thread: %d Result: %s \n", this_thread::get_id(), msg.c_str());
+
+            float snippet_time = FunASRGetRetSnippetTime(result);
+            n_total_length += snippet_time;
+            FunASRFreeResult(result);
+        }else{
+            cout <<"No return data!";
+        }
+    }
+    {
+        lock_guard<mutex> guard(mtx);
+        *total_length += n_total_length;
+        if(*total_time < n_total_time){
+            *total_time = n_total_time;
+        }
+    }
+}
+
+int main(int argc, char *argv[])
+{
+
+    if (argc < 5)
+    {
+        printf("Usage: %s /path/to/model_dir /path/to/wav.scp quantize(true or false) thread_num \n", argv[0]);
+        exit(-1);
+    }
+
+    // read wav.scp
+    vector<string> wav_list;
+    ifstream in(argv[2]);
+    if (!in.is_open()) {
+        printf("Failed to open file: %s", argv[2]);
+        return 0;
+    }
+    string line;
+    while(getline(in, line))
+    {
+        istringstream iss(line);
+        string column1, column2;
+        iss >> column1 >> column2;
+        wav_list.push_back(column2); 
+    }
+    in.close();
+
+    // model init
+    struct timeval start, end;
+    gettimeofday(&start, NULL);
+    // is quantize
+    bool quantize = false;
+    istringstream(argv[3]) >> boolalpha >> quantize;
+    // thread num
+    int thread_num = 1;
+    thread_num = atoi(argv[4]);
+
+    FUNASR_HANDLE asr_handle=FunASRInit(argv[1], 1, quantize);
+    if (!asr_handle)
+    {
+        printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
+        exit(-1);
+    }
+    gettimeofday(&end, NULL);
+    long seconds = (end.tv_sec - start.tv_sec);
+    long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
+    printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);
+
+    // 澶氱嚎绋嬫祴璇�
+    float total_length = 0.0f;
+    long total_time = 0;
+    std::vector<std::thread> threads;
+
+    for (int i = 0; i < thread_num; i++)
+    {
+        threads.emplace_back(thread(runReg, asr_handle, wav_list, &total_length, &total_time, i));
+    }
+
+    for (auto& thread : threads)
+    {
+        thread.join();
+    }
+
+    printf("total_time_wav %ld ms.\n", (long)(total_length * 1000));
+    printf("total_time_comput %ld ms.\n", total_time / 1000);
+    printf("total_rtf %05lf .\n", (double)total_time/ (total_length*1000000));
+    printf("speedup %05lf .\n", 1.0/((double)total_time/ (total_length*1000000)));
+
+    FunASRUninit(asr_handle);
+    return 0;
+}
diff --git a/funasr/runtime/onnxruntime/tester/tester.cpp b/funasr/runtime/onnxruntime/src/funasr-onnx-offline.cpp
similarity index 65%
rename from funasr/runtime/onnxruntime/tester/tester.cpp
rename to funasr/runtime/onnxruntime/src/funasr-onnx-offline.cpp
index 7257603..b0d2e4d 100644
--- a/funasr/runtime/onnxruntime/tester/tester.cpp
+++ b/funasr/runtime/onnxruntime/src/funasr-onnx-offline.cpp
@@ -6,29 +6,29 @@
 #endif
 
 #include "libfunasrapi.h"
-
-#include <iostream>
-#include <fstream>
 #include <sstream>
 using namespace std;
 
 int main(int argc, char *argv[])
 {
-
-    if (argc < 4)
+    if (argc < 6)
     {
-        printf("Usage: %s /path/to/model_dir /path/to/wav/file quantize(true or false) \n", argv[0]);
+        printf("Usage: %s /path/to/model_dir /path/to/wav/file quantize(true or false) use_vad(true or false) use_punc(true or false)\n", argv[0]);
         exit(-1);
     }
     struct timeval start, end;
     gettimeofday(&start, NULL);
-    int nThreadNum = 4;
+    int thread_num = 1;
     // is quantize
     bool quantize = false;
+    bool use_vad = false;
+    bool use_punc = false;
     istringstream(argv[3]) >> boolalpha >> quantize;
-    FUNASR_HANDLE AsrHanlde=FunASRInit(argv[1], nThreadNum, quantize);
+    istringstream(argv[4]) >> boolalpha >> use_vad;
+    istringstream(argv[5]) >> boolalpha >> use_punc;
+    FUNASR_HANDLE asr_hanlde=FunASRInit(argv[1], thread_num, quantize, use_vad, use_punc);
 
-    if (!AsrHanlde)
+    if (!asr_hanlde)
     {
         printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
         exit(-1);
@@ -40,23 +40,21 @@
     printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);
 
     gettimeofday(&start, NULL);
-    float snippet_time = 0.0f;
-
-    FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL);
-
+    FUNASR_RESULT result=FunASRRecogFile(asr_hanlde, argv[2], RASR_NONE, NULL, use_vad, use_punc);
     gettimeofday(&end, NULL);
-   
-    if (Result)
+
+    float snippet_time = 0.0f;
+    if (result)
     {
-        string msg = FunASRGetResult(Result, 0);
+        string msg = FunASRGetResult(result, 0);
         setbuf(stdout, NULL);
         printf("Result: %s \n", msg.c_str());
-        snippet_time = FunASRGetRetSnippetTime(Result);
-        FunASRFreeResult(Result);
+        snippet_time = FunASRGetRetSnippetTime(result);
+        FunASRFreeResult(result);
     }
     else
     {
-        cout <<"no return data!";
+        printf("no return data!\n");
     }
  
     printf("Audio length %lfs.\n", (double)snippet_time);
@@ -65,7 +63,7 @@
     printf("Model inference takes %lfs.\n", (double)taking_micros / 1000000);
     printf("Model inference RTF: %04lf.\n", (double)taking_micros/ (snippet_time*1000000));
 
-    FunASRUninit(AsrHanlde);
+    FunASRUninit(asr_hanlde);
 
     return 0;
 }
diff --git a/funasr/runtime/onnxruntime/src/libfunasrapi.cpp b/funasr/runtime/onnxruntime/src/libfunasrapi.cpp
index a2ecf10..10c061e 100644
--- a/funasr/runtime/onnxruntime/src/libfunasrapi.cpp
+++ b/funasr/runtime/onnxruntime/src/libfunasrapi.cpp
@@ -4,177 +4,197 @@
 extern "C" {
 #endif
 
-	// APIs for qmasr
-	_FUNASRAPI FUNASR_HANDLE  FunASRInit(const char* szModelDir, int nThreadNum, bool quantize)
+	// APIs for funasr
+	_FUNASRAPI FUNASR_HANDLE  FunASRInit(const char* sz_model_dir, int thread_num, bool quantize, bool use_vad, bool use_punc)
 	{
-		Model* mm = create_model(szModelDir, nThreadNum, quantize);
+		Model* mm = CreateModel(sz_model_dir, thread_num, quantize, use_vad, use_punc);
 		return mm;
 	}
 
-	_FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
+	_FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad, bool use_punc)
 	{
-		Model* pRecogObj = (Model*)handle;
-		if (!pRecogObj)
+		Model* recog_obj = (Model*)handle;
+		if (!recog_obj)
 			return nullptr;
 
 		int32_t sampling_rate = -1;
 		Audio audio(1);
-		if (!audio.loadwav(szBuf, nLen, &sampling_rate))
+		if (!audio.LoadWav(sz_buf, n_len, &sampling_rate))
 			return nullptr;
-		//audio.split();
+		if(use_vad){
+			audio.Split(recog_obj);
+		}
 
 		float* buff;
 		int len;
 		int flag=0;
-		FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
-		pResult->snippet_time = audio.get_time_len();
-		int nStep = 0;
-		int nTotal = audio.get_queue_size();
-		while (audio.fetch(buff, len, flag) > 0) {
-			//pRecogObj->reset();
-			string msg = pRecogObj->forward(buff, len, flag);
-			pResult->msg += msg;
-			nStep++;
-			if (fnCallback)
-				fnCallback(nStep, nTotal);
+		FUNASR_RECOG_RESULT* p_result = new FUNASR_RECOG_RESULT;
+		p_result->snippet_time = audio.GetTimeLen();
+		int n_step = 0;
+		int n_total = audio.GetQueueSize();
+		while (audio.Fetch(buff, len, flag) > 0) {
+			string msg = recog_obj->Forward(buff, len, flag);
+			p_result->msg += msg;
+			n_step++;
+			if (fn_callback)
+				fn_callback(n_step, n_total);
+		}
+		if(use_punc){
+			string punc_res = recog_obj->AddPunc((p_result->msg).c_str());
+			p_result->msg = punc_res;
 		}
 
-		return pResult;
+		return p_result;
 	}
 
-	_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
+	_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad, bool use_punc)
 	{
-		Model* pRecogObj = (Model*)handle;
-		if (!pRecogObj)
+		Model* recog_obj = (Model*)handle;
+		if (!recog_obj)
 			return nullptr;
 
 		Audio audio(1);
-		if (!audio.loadpcmwav(szBuf, nLen, &sampling_rate))
+		if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
 			return nullptr;
-		//audio.split();
+		if(use_vad){
+			audio.Split(recog_obj);
+		}
 
 		float* buff;
 		int len;
 		int flag = 0;
-		FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
-		pResult->snippet_time = audio.get_time_len();
-		int nStep = 0;
-		int nTotal = audio.get_queue_size();
-		while (audio.fetch(buff, len, flag) > 0) {
-			//pRecogObj->reset();
-			string msg = pRecogObj->forward(buff, len, flag);
-			pResult->msg += msg;
-			nStep++;
-			if (fnCallback)
-				fnCallback(nStep, nTotal);
+		FUNASR_RECOG_RESULT* p_result = new FUNASR_RECOG_RESULT;
+		p_result->snippet_time = audio.GetTimeLen();
+		int n_step = 0;
+		int n_total = audio.GetQueueSize();
+		while (audio.Fetch(buff, len, flag) > 0) {
+			string msg = recog_obj->Forward(buff, len, flag);
+			p_result->msg += msg;
+			n_step++;
+			if (fn_callback)
+				fn_callback(n_step, n_total);
+		}
+		if(use_punc){
+			string punc_res = recog_obj->AddPunc((p_result->msg).c_str());
+			p_result->msg = punc_res;
 		}
 
-		return pResult;
+		return p_result;
 	}
 
-	_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
+	_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad, bool use_punc)
 	{
-		Model* pRecogObj = (Model*)handle;
-		if (!pRecogObj)
+		Model* recog_obj = (Model*)handle;
+		if (!recog_obj)
 			return nullptr;
 
 		Audio audio(1);
-		if (!audio.loadpcmwav(szFileName, &sampling_rate))
+		if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
 			return nullptr;
-		//audio.split();
+		if(use_vad){
+			audio.Split(recog_obj);
+		}
 
 		float* buff;
 		int len;
 		int flag = 0;
-		FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
-		pResult->snippet_time = audio.get_time_len();
-		int nStep = 0;
-		int nTotal = audio.get_queue_size();
-		while (audio.fetch(buff, len, flag) > 0) {
-			//pRecogObj->reset();
-			string msg = pRecogObj->forward(buff, len, flag);
-			pResult->msg += msg;
-			nStep++;
-			if (fnCallback)
-				fnCallback(nStep, nTotal);
+		FUNASR_RECOG_RESULT* p_result = new FUNASR_RECOG_RESULT;
+		p_result->snippet_time = audio.GetTimeLen();
+		int n_step = 0;
+		int n_total = audio.GetQueueSize();
+		while (audio.Fetch(buff, len, flag) > 0) {
+			string msg = recog_obj->Forward(buff, len, flag);
+			p_result->msg += msg;
+			n_step++;
+			if (fn_callback)
+				fn_callback(n_step, n_total);
+		}
+		if(use_punc){
+			string punc_res = recog_obj->AddPunc((p_result->msg).c_str());
+			p_result->msg = punc_res;
 		}
 
-		return pResult;
+		return p_result;
 	}
 
-	_FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* szWavfile, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
+	_FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool use_vad, bool use_punc)
 	{
-		Model* pRecogObj = (Model*)handle;
-		if (!pRecogObj)
+		Model* recog_obj = (Model*)handle;
+		if (!recog_obj)
 			return nullptr;
 		
 		int32_t sampling_rate = -1;
 		Audio audio(1);
-		if(!audio.loadwav(szWavfile, &sampling_rate))
+		if(!audio.LoadWav(sz_wavfile, &sampling_rate))
 			return nullptr;
-		//audio.split();
+		if(use_vad){
+			audio.Split(recog_obj);
+		}
 
 		float* buff;
 		int len;
 		int flag = 0;
-		int nStep = 0;
-		int nTotal = audio.get_queue_size();
-		FUNASR_RECOG_RESULT* pResult = new FUNASR_RECOG_RESULT;
-		pResult->snippet_time = audio.get_time_len();
-		while (audio.fetch(buff, len, flag) > 0) {
-			//pRecogObj->reset();
-			string msg = pRecogObj->forward(buff, len, flag);
-			pResult->msg+= msg;
-			nStep++;
-			if (fnCallback)
-				fnCallback(nStep, nTotal);
+		int n_step = 0;
+		int n_total = audio.GetQueueSize();
+		FUNASR_RECOG_RESULT* p_result = new FUNASR_RECOG_RESULT;
+		p_result->snippet_time = audio.GetTimeLen();
+		while (audio.Fetch(buff, len, flag) > 0) {
+			string msg = recog_obj->Forward(buff, len, flag);
+			p_result->msg+= msg;
+			n_step++;
+			if (fn_callback)
+				fn_callback(n_step, n_total);
+		}
+		if(use_punc){
+			string punc_res = recog_obj->AddPunc((p_result->msg).c_str());
+			p_result->msg = punc_res;
 		}
 	
-		return pResult;
+		return p_result;
 	}
 
-	_FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT Result)
+	_FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result)
 	{
-		if (!Result)
+		if (!result)
 			return 0;
 
 		return 1;
 	}
 
 
-	_FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT Result)
+	_FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT result)
 	{
-		if (!Result)
+		if (!result)
 			return 0.0f;
 
-		return ((FUNASR_RECOG_RESULT*)Result)->snippet_time;
+		return ((FUNASR_RECOG_RESULT*)result)->snippet_time;
 	}
 
-	_FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT Result,int nIndex)
+	_FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT result,int n_index)
 	{
-		FUNASR_RECOG_RESULT * pResult = (FUNASR_RECOG_RESULT*)Result;
-		if(!pResult)
+		FUNASR_RECOG_RESULT * p_result = (FUNASR_RECOG_RESULT*)result;
+		if(!p_result)
 			return nullptr;
 
-		return pResult->msg.c_str();
+		return p_result->msg.c_str();
 	}
 
-	_FUNASRAPI void FunASRFreeResult(FUNASR_RESULT Result)
+	_FUNASRAPI void FunASRFreeResult(FUNASR_RESULT result)
 	{
-		if (Result)
+		if (result)
 		{
-			delete (FUNASR_RECOG_RESULT*)Result;
+			delete (FUNASR_RECOG_RESULT*)result;
 		}
 	}
 
 	_FUNASRAPI void FunASRUninit(FUNASR_HANDLE handle)
 	{
-		Model* pRecogObj = (Model*)handle;
+		Model* recog_obj = (Model*)handle;
 
-		if (!pRecogObj)
+		if (!recog_obj)
 			return;
 
-		delete pRecogObj;
+		delete recog_obj;
 	}
 
 #ifdef __cplusplus 
diff --git a/funasr/runtime/onnxruntime/src/model.cpp b/funasr/runtime/onnxruntime/src/model.cpp
new file mode 100644
index 0000000..a582f82
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/model.cpp
@@ -0,0 +1,8 @@
+#include "precomp.h"
+
+Model *CreateModel(const char *path, int thread_num, bool quantize, bool use_vad, bool use_punc)
+{
+    Model *mm;
+    mm = new paraformer::Paraformer(path, thread_num, quantize, use_vad, use_punc);
+    return mm;
+}
diff --git a/funasr/runtime/onnxruntime/src/online-feature.cpp b/funasr/runtime/onnxruntime/src/online-feature.cpp
new file mode 100644
index 0000000..36e2770
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/online-feature.cpp
@@ -0,0 +1,129 @@
+
+#include "online-feature.h"
+#include <utility>
+
+OnlineFeature::OnlineFeature(int sample_rate, knf::FbankOptions fbank_opts, int lfr_m, int lfr_n,
+                             std::vector<std::vector<float>> cmvns)
+  : sample_rate_(sample_rate),
+    fbank_opts_(std::move(fbank_opts)),
+    lfr_m_(lfr_m),
+    lfr_n_(lfr_n),
+    cmvns_(std::move(cmvns)) {
+  frame_sample_length_ = sample_rate_ / 1000 * 25;;
+  frame_shift_sample_length_ = sample_rate_ / 1000 * 10;
+}
+
+void OnlineFeature::ExtractFeats(vector<std::vector<float>> &vad_feats,
+                                 vector<float> waves, bool input_finished) {
+  input_finished_ = input_finished;
+  OnlineFbank(vad_feats, waves);
+  // cache deal & online lfr,cmvn
+  if (vad_feats.size() > 0) {
+    if (!reserve_waveforms_.empty()) {
+      waves.insert(waves.begin(), reserve_waveforms_.begin(), reserve_waveforms_.end());
+    }
+    if (lfr_splice_cache_.empty()) {
+      for (int i = 0; i < (lfr_m_ - 1) / 2; i++) {
+        lfr_splice_cache_.emplace_back(vad_feats[0]);
+      }
+    }
+    if (vad_feats.size() + lfr_splice_cache_.size() >= lfr_m_) {
+      vad_feats.insert(vad_feats.begin(), lfr_splice_cache_.begin(), lfr_splice_cache_.end());
+      int frame_from_waves = (waves.size() - frame_sample_length_) / frame_shift_sample_length_ + 1;
+      int minus_frame = reserve_waveforms_.empty() ? (lfr_m_ - 1) / 2 : 0;
+      int lfr_splice_frame_idxs = OnlineLfrCmvn(vad_feats);
+      int reserve_frame_idx = lfr_splice_frame_idxs - minus_frame;
+      reserve_waveforms_.clear();
+      reserve_waveforms_.insert(reserve_waveforms_.begin(),
+                                waves.begin() + reserve_frame_idx * frame_shift_sample_length_,
+                                waves.begin() + frame_from_waves * frame_shift_sample_length_);
+      int sample_length = (frame_from_waves - 1) * frame_shift_sample_length_ + frame_sample_length_;
+      waves.erase(waves.begin() + sample_length, waves.end());
+    } else {
+      reserve_waveforms_.clear();
+      reserve_waveforms_.insert(reserve_waveforms_.begin(),
+                                waves.begin() + frame_sample_length_ - frame_shift_sample_length_, waves.end());
+      lfr_splice_cache_.insert(lfr_splice_cache_.end(), vad_feats.begin(), vad_feats.end());
+    }
+
+  } else {
+    if (input_finished_) {
+      if (!reserve_waveforms_.empty()) {
+        waves = reserve_waveforms_;
+      }
+      vad_feats = lfr_splice_cache_;
+      OnlineLfrCmvn(vad_feats);
+      ResetCache();
+    }
+  }
+
+}
+
+int OnlineFeature::OnlineLfrCmvn(vector<vector<float>> &vad_feats) {
+  vector<vector<float>> out_feats;
+  int T = vad_feats.size();
+  int T_lrf = ceil((T - (lfr_m_ - 1) / 2) / lfr_n_);
+  int lfr_splice_frame_idxs = T_lrf;
+  vector<float> p;
+  for (int i = 0; i < T_lrf; i++) {
+    if (lfr_m_ <= T - i * lfr_n_) {
+      for (int j = 0; j < lfr_m_; j++) {
+        p.insert(p.end(), vad_feats[i * lfr_n_ + j].begin(), vad_feats[i * lfr_n_ + j].end());
+      }
+      out_feats.emplace_back(p);
+      p.clear();
+    } else {
+      if (input_finished_) {
+        int num_padding = lfr_m_ - (T - i * lfr_n_);
+        for (int j = 0; j < (vad_feats.size() - i * lfr_n_); j++) {
+          p.insert(p.end(), vad_feats[i * lfr_n_ + j].begin(), vad_feats[i * lfr_n_ + j].end());
+        }
+        for (int j = 0; j < num_padding; j++) {
+          p.insert(p.end(), vad_feats[vad_feats.size() - 1].begin(), vad_feats[vad_feats.size() - 1].end());
+        }
+        out_feats.emplace_back(p);
+      } else {
+        lfr_splice_frame_idxs = i;
+        break;
+      }
+    }
+  }
+  lfr_splice_frame_idxs = std::min(T - 1, lfr_splice_frame_idxs * lfr_n_);
+  lfr_splice_cache_.clear();
+  lfr_splice_cache_.insert(lfr_splice_cache_.begin(), vad_feats.begin() + lfr_splice_frame_idxs, vad_feats.end());
+
+  // Apply cmvn
+  for (auto &out_feat: out_feats) {
+    for (int j = 0; j < cmvns_[0].size(); j++) {
+      out_feat[j] = (out_feat[j] + cmvns_[0][j]) * cmvns_[1][j];
+    }
+  }
+  vad_feats = out_feats;
+  return lfr_splice_frame_idxs;
+}
+
+void OnlineFeature::OnlineFbank(vector<std::vector<float>> &vad_feats,
+                                vector<float> &waves) {
+
+  knf::OnlineFbank fbank(fbank_opts_);
+  // cache merge
+  waves.insert(waves.begin(), input_cache_.begin(), input_cache_.end());
+  int frame_number = ComputeFrameNum(waves.size(), frame_sample_length_, frame_shift_sample_length_);
+  // Send the audio after the last frame shift position to the cache
+  input_cache_.clear();
+  input_cache_.insert(input_cache_.begin(), waves.begin() + frame_number * frame_shift_sample_length_, waves.end());
+  if (frame_number == 0) {
+    return;
+  }
+  // Delete audio that haven't undergone fbank processing
+  waves.erase(waves.begin() + (frame_number - 1) * frame_shift_sample_length_ + frame_sample_length_, waves.end());
+
+  fbank.AcceptWaveform(sample_rate_, &waves[0], waves.size());
+  int32_t frames = fbank.NumFramesReady();
+  for (int32_t i = 0; i != frames; ++i) {
+    const float *frame = fbank.GetFrame(i);
+    vector<float> frame_vector(frame, frame + fbank_opts_.mel_opts.num_bins);
+    vad_feats.emplace_back(frame_vector);
+  }
+
+}
diff --git a/funasr/runtime/onnxruntime/src/online-feature.h b/funasr/runtime/onnxruntime/src/online-feature.h
new file mode 100644
index 0000000..78245de
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/online-feature.h
@@ -0,0 +1,51 @@
+
+#include <vector>
+#include "precomp.h"
+
+using namespace std;
+
+class OnlineFeature {
+
+public:
+  OnlineFeature(int sample_rate, knf::FbankOptions fbank_opts, int lfr_m_, int lfr_n_,
+                std::vector<std::vector<float>> cmvns_);
+
+  void ExtractFeats(vector<vector<float>> &vad_feats, vector<float> waves, bool input_finished);
+
+private:
+  void OnlineFbank(vector<vector<float>> &vad_feats, vector<float> &waves);
+  int OnlineLfrCmvn(vector<vector<float>> &vad_feats);
+  
+  static int ComputeFrameNum(int sample_length, int frame_sample_length, int frame_shift_sample_length) {
+    int frame_num = static_cast<int>((sample_length - frame_sample_length) / frame_shift_sample_length + 1);
+    if (frame_num >= 1 && sample_length >= frame_sample_length)
+      return frame_num;
+    else
+      return 0;
+  }
+
+  void ResetCache() {
+    reserve_waveforms_.clear();
+    input_cache_.clear();
+    lfr_splice_cache_.clear();
+    input_finished_ = false;
+
+  }
+
+  knf::FbankOptions fbank_opts_;
+  // The reserved waveforms by fbank
+  std::vector<float> reserve_waveforms_;
+  // waveforms reserved after last shift position
+  std::vector<float> input_cache_;
+  // lfr reserved cache
+  std::vector<std::vector<float>> lfr_splice_cache_;
+  std::vector<std::vector<float>> cmvns_;
+
+  int sample_rate_ = 16000;
+  int frame_sample_length_ = sample_rate_ / 1000 * 25;;
+  int frame_shift_sample_length_ = sample_rate_ / 1000 * 10;
+  int lfr_m_;
+  int lfr_n_;
+  bool input_finished_ = false;
+
+};
diff --git a/funasr/runtime/onnxruntime/src/paraformer.cpp b/funasr/runtime/onnxruntime/src/paraformer.cpp
new file mode 100644
index 0000000..72127f8
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/paraformer.cpp
@@ -0,0 +1,262 @@
+#include "precomp.h"
+
+using namespace std;
+using namespace paraformer;
+
+Paraformer::Paraformer(const char* path,int thread_num, bool quantize, bool use_vad, bool use_punc)
+:env_(ORT_LOGGING_LEVEL_ERROR, "paraformer"),session_options{}{
+    string model_path;
+    string cmvn_path;
+    string config_path;
+
+    // VAD model
+    if(use_vad){
+        string vad_path = PathAppend(path, "vad_model.onnx");
+        string mvn_path = PathAppend(path, "vad.mvn");
+        vad_handle = make_unique<FsmnVad>();
+        vad_handle->InitVad(vad_path, mvn_path, MODEL_SAMPLE_RATE, VAD_MAX_LEN, VAD_SILENCE_DYRATION, VAD_SPEECH_NOISE_THRES);
+    }
+
+    // PUNC model
+    if(use_punc){
+        punc_handle = make_unique<CTTransformer>(path, thread_num);
+    }
+
+    if(quantize)
+    {
+        model_path = PathAppend(path, "model_quant.onnx");
+    }else{
+        model_path = PathAppend(path, "model.onnx");
+    }
+    cmvn_path = PathAppend(path, "am.mvn");
+    config_path = PathAppend(path, "config.yaml");
+
+    // knf options
+    fbank_opts.frame_opts.dither = 0;
+    fbank_opts.mel_opts.num_bins = 80;
+    fbank_opts.frame_opts.samp_freq = MODEL_SAMPLE_RATE;
+    fbank_opts.frame_opts.window_type = "hamming";
+    fbank_opts.frame_opts.frame_shift_ms = 10;
+    fbank_opts.frame_opts.frame_length_ms = 25;
+    fbank_opts.energy_floor = 0;
+    fbank_opts.mel_opts.debug_mel = false;
+    // fbank_ = std::make_unique<knf::OnlineFbank>(fbank_opts);
+
+    // session_options.SetInterOpNumThreads(1);
+    session_options.SetIntraOpNumThreads(thread_num);
+    session_options.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
+    // DisableCpuMemArena can improve performance
+    session_options.DisableCpuMemArena();
+
+#ifdef _WIN32
+    wstring wstrPath = strToWstr(model_path);
+    m_session = std::make_unique<Ort::Session>(env_, model_path.c_str(), session_options);
+#else
+    m_session = std::make_unique<Ort::Session>(env_, model_path.c_str(), session_options);
+#endif
+
+    string strName;
+    GetInputName(m_session.get(), strName);
+    m_strInputNames.push_back(strName.c_str());
+    GetInputName(m_session.get(), strName,1);
+    m_strInputNames.push_back(strName);
+    
+    GetOutputName(m_session.get(), strName);
+    m_strOutputNames.push_back(strName);
+    GetOutputName(m_session.get(), strName,1);
+    m_strOutputNames.push_back(strName);
+
+    for (auto& item : m_strInputNames)
+        m_szInputNames.push_back(item.c_str());
+    for (auto& item : m_strOutputNames)
+        m_szOutputNames.push_back(item.c_str());
+    vocab = new Vocab(config_path.c_str());
+    LoadCmvn(cmvn_path.c_str());
+}
+
+Paraformer::~Paraformer()
+{
+    if(vocab)
+        delete vocab;
+}
+
+void Paraformer::Reset()
+{
+}
+
+vector<std::vector<int>> Paraformer::VadSeg(std::vector<float>& pcm_data){
+    return vad_handle->Infer(pcm_data);
+}
+
+string Paraformer::AddPunc(const char* sz_input){
+    return punc_handle->AddPunc(sz_input);
+}
+
+vector<float> Paraformer::FbankKaldi(float sample_rate, const float* waves, int len) {
+    knf::OnlineFbank fbank_(fbank_opts);
+    fbank_.AcceptWaveform(sample_rate, waves, len);
+    //fbank_->InputFinished();
+    int32_t frames = fbank_.NumFramesReady();
+    int32_t feature_dim = fbank_opts.mel_opts.num_bins;
+    vector<float> features(frames * feature_dim);
+    float *p = features.data();
+
+    for (int32_t i = 0; i != frames; ++i) {
+        const float *f = fbank_.GetFrame(i);
+        std::copy(f, f + feature_dim, p);
+        p += feature_dim;
+    }
+
+    return features;
+}
+
+void Paraformer::LoadCmvn(const char *filename)
+{
+    ifstream cmvn_stream(filename);
+    string line;
+
+    while (getline(cmvn_stream, line)) {
+        istringstream iss(line);
+        vector<string> line_item{istream_iterator<string>{iss}, istream_iterator<string>{}};
+        if (line_item[0] == "<AddShift>") {
+            getline(cmvn_stream, line);
+            istringstream means_lines_stream(line);
+            vector<string> means_lines{istream_iterator<string>{means_lines_stream}, istream_iterator<string>{}};
+            if (means_lines[0] == "<LearnRateCoef>") {
+                for (int j = 3; j < means_lines.size() - 1; j++) {
+                    means_list.push_back(stof(means_lines[j]));
+                }
+                continue;
+            }
+        }
+        else if (line_item[0] == "<Rescale>") {
+            getline(cmvn_stream, line);
+            istringstream vars_lines_stream(line);
+            vector<string> vars_lines{istream_iterator<string>{vars_lines_stream}, istream_iterator<string>{}};
+            if (vars_lines[0] == "<LearnRateCoef>") {
+                for (int j = 3; j < vars_lines.size() - 1; j++) {
+                    vars_list.push_back(stof(vars_lines[j])*scale);
+                }
+                continue;
+            }
+        }
+    }
+}
+
+string Paraformer::GreedySearch(float * in, int n_len,  int64_t token_nums)
+{
+    vector<int> hyps;
+    int Tmax = n_len;
+    for (int i = 0; i < Tmax; i++) {
+        int max_idx;
+        float max_val;
+        FindMax(in + i * token_nums, token_nums, max_val, max_idx);
+        hyps.push_back(max_idx);
+    }
+
+    return vocab->Vector2StringV2(hyps);
+}
+
+vector<float> Paraformer::ApplyLfr(const std::vector<float> &in) 
+{
+    int32_t in_feat_dim = fbank_opts.mel_opts.num_bins;
+    int32_t in_num_frames = in.size() / in_feat_dim;
+    int32_t out_num_frames =
+        (in_num_frames - lfr_window_size) / lfr_window_shift + 1;
+    int32_t out_feat_dim = in_feat_dim * lfr_window_size;
+
+    std::vector<float> out(out_num_frames * out_feat_dim);
+
+    const float *p_in = in.data();
+    float *p_out = out.data();
+
+    for (int32_t i = 0; i != out_num_frames; ++i) {
+      std::copy(p_in, p_in + out_feat_dim, p_out);
+
+      p_out += out_feat_dim;
+      p_in += lfr_window_shift * in_feat_dim;
+    }
+
+    return out;
+  }
+
+  void Paraformer::ApplyCmvn(std::vector<float> *v)
+  {
+    int32_t dim = means_list.size();
+    int32_t num_frames = v->size() / dim;
+
+    float *p = v->data();
+
+    for (int32_t i = 0; i != num_frames; ++i) {
+      for (int32_t k = 0; k != dim; ++k) {
+        p[k] = (p[k] + means_list[k]) * vars_list[k];
+      }
+
+      p += dim;
+    }
+  }
+
+string Paraformer::Forward(float* din, int len, int flag)
+{
+
+    int32_t in_feat_dim = fbank_opts.mel_opts.num_bins;
+    std::vector<float> wav_feats = FbankKaldi(MODEL_SAMPLE_RATE, din, len);
+    wav_feats = ApplyLfr(wav_feats);
+    ApplyCmvn(&wav_feats);
+
+    int32_t feat_dim = lfr_window_size*in_feat_dim;
+    int32_t num_frames = wav_feats.size() / feat_dim;
+
+#ifdef _WIN_X86
+        Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+#else
+        Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+#endif
+
+    const int64_t input_shape_[3] = {1, num_frames, feat_dim};
+    Ort::Value onnx_feats = Ort::Value::CreateTensor<float>(m_memoryInfo,
+        wav_feats.data(),
+        wav_feats.size(),
+        input_shape_,
+        3);
+
+    const int64_t paraformer_length_shape[1] = {1};
+    std::vector<int32_t> paraformer_length;
+    paraformer_length.emplace_back(num_frames);
+    Ort::Value onnx_feats_len = Ort::Value::CreateTensor<int32_t>(
+          m_memoryInfo, paraformer_length.data(), paraformer_length.size(), paraformer_length_shape, 1);
+    
+    std::vector<Ort::Value> input_onnx;
+    input_onnx.emplace_back(std::move(onnx_feats));
+    input_onnx.emplace_back(std::move(onnx_feats_len));
+
+    string result;
+    try {
+        auto outputTensor = m_session->Run(Ort::RunOptions{nullptr}, m_szInputNames.data(), input_onnx.data(), input_onnx.size(), m_szOutputNames.data(), m_szOutputNames.size());
+        std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
+
+        int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
+        float* floatData = outputTensor[0].GetTensorMutableData<float>();
+        auto encoder_out_lens = outputTensor[1].GetTensorMutableData<int64_t>();
+        result = GreedySearch(floatData, *encoder_out_lens, outputShape[2]);
+    }
+    catch (std::exception const &e)
+    {
+        printf(e.what());
+    }
+
+    return result;
+}
+
+string Paraformer::ForwardChunk(float* din, int len, int flag)
+{
+
+    printf("Not Imp!!!!!!\n");
+    return "Hello";
+}
+
+string Paraformer::Rescoring()
+{
+    printf("Not Imp!!!!!!\n");
+    return "Hello";
+}
diff --git a/funasr/runtime/onnxruntime/src/paraformer.h b/funasr/runtime/onnxruntime/src/paraformer.h
new file mode 100644
index 0000000..5301932
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/paraformer.h
@@ -0,0 +1,58 @@
+#pragma once
+
+
+#ifndef PARAFORMER_MODELIMP_H
+#define PARAFORMER_MODELIMP_H
+
+#include "precomp.h"
+
+namespace paraformer {
+
+    class Paraformer : public Model {
+    /**
+     * Author: Speech Lab of DAMO Academy, Alibaba Group
+     * Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition
+     * https://arxiv.org/pdf/2206.08317.pdf
+    */
+    private:
+        //std::unique_ptr<knf::OnlineFbank> fbank_;
+        knf::FbankOptions fbank_opts;
+
+        std::unique_ptr<FsmnVad> vad_handle;
+        std::unique_ptr<CTTransformer> punc_handle;
+
+        Vocab* vocab;
+        vector<float> means_list;
+        vector<float> vars_list;
+        const float scale = 22.6274169979695;
+        int32_t lfr_window_size = 7;
+        int32_t lfr_window_shift = 6;
+
+        void LoadCmvn(const char *filename);
+        vector<float> ApplyLfr(const vector<float> &in);
+        void ApplyCmvn(vector<float> *v);
+
+        string GreedySearch( float* in, int n_len, int64_t token_nums);
+
+        std::shared_ptr<Ort::Session> m_session;
+        Ort::Env env_;
+        Ort::SessionOptions session_options;
+
+        vector<string> m_strInputNames, m_strOutputNames;
+        vector<const char*> m_szInputNames;
+        vector<const char*> m_szOutputNames;
+
+    public:
+        Paraformer(const char* path, int thread_num=0, bool quantize=false, bool use_vad=false, bool use_punc=false);
+        ~Paraformer();
+        void Reset();
+        vector<float> FbankKaldi(float sample_rate, const float* waves, int len);
+        string ForwardChunk(float* din, int len, int flag);
+        string Forward(float* din, int len, int flag);
+        string Rescoring();
+        std::vector<std::vector<int>> VadSeg(std::vector<float>& pcm_data);
+        string AddPunc(const char* sz_input);
+    };
+
+} // namespace paraformer
+#endif
diff --git a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp b/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
deleted file mode 100644
index 695e0f7..0000000
--- a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-#include "precomp.h"
-
-using namespace std;
-using namespace paraformer;
-
-ModelImp::ModelImp(const char* path,int nNumThread, bool quantize)
-:env_(ORT_LOGGING_LEVEL_ERROR, "paraformer"),sessionOptions{}{
-    string model_path;
-    string cmvn_path;
-    string config_path;
-
-    if(quantize)
-    {
-        model_path = pathAppend(path, "model_quant.onnx");
-    }else{
-        model_path = pathAppend(path, "model.onnx");
-    }
-    cmvn_path = pathAppend(path, "am.mvn");
-    config_path = pathAppend(path, "config.yaml");
-
-    fft_input = (float *)fftwf_malloc(sizeof(float) * fft_size);
-    fft_out = (fftwf_complex *)fftwf_malloc(sizeof(fftwf_complex) * fft_size);
-    memset(fft_input, 0, sizeof(float) * fft_size);
-    plan = fftwf_plan_dft_r2c_1d(fft_size, fft_input, fft_out, FFTW_ESTIMATE);
-
-    //sessionOptions.SetInterOpNumThreads(1);
-    sessionOptions.SetIntraOpNumThreads(nNumThread);
-    sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
-
-#ifdef _WIN32
-    wstring wstrPath = strToWstr(model_path);
-    m_session = std::make_unique<Ort::Session>(env_, model_path.c_str(), sessionOptions);
-#else
-    m_session = std::make_unique<Ort::Session>(env_, model_path.c_str(), sessionOptions);
-#endif
-
-    string strName;
-    getInputName(m_session.get(), strName);
-    m_strInputNames.push_back(strName.c_str());
-    getInputName(m_session.get(), strName,1);
-    m_strInputNames.push_back(strName);
-    
-    getOutputName(m_session.get(), strName);
-    m_strOutputNames.push_back(strName);
-    getOutputName(m_session.get(), strName,1);
-    m_strOutputNames.push_back(strName);
-
-    for (auto& item : m_strInputNames)
-        m_szInputNames.push_back(item.c_str());
-    for (auto& item : m_strOutputNames)
-        m_szOutputNames.push_back(item.c_str());
-    vocab = new Vocab(config_path.c_str());
-    load_cmvn(cmvn_path.c_str());
-}
-
-ModelImp::~ModelImp()
-{
-    if(vocab)
-        delete vocab;
-    fftwf_free(fft_input);
-    fftwf_free(fft_out);
-    fftwf_destroy_plan(plan);
-    fftwf_cleanup();
-}
-
-void ModelImp::reset()
-{
-}
-
-void ModelImp::apply_lfr(Tensor<float>*& din)
-{
-    int mm = din->size[2];
-    int ll = ceil(mm / 6.0);
-    Tensor<float>* tmp = new Tensor<float>(ll, 560);
-    int out_offset = 0;
-    for (int i = 0; i < ll; i++) {
-        for (int j = 0; j < 7; j++) {
-            int idx = i * 6 + j - 3;
-            if (idx < 0) {
-                idx = 0;
-            }
-            if (idx >= mm) {
-                idx = mm - 1;
-            }
-            memcpy(tmp->buff + out_offset, din->buff + idx * 80,
-                sizeof(float) * 80);
-            out_offset += 80;
-        }
-    }
-    delete din;
-    din = tmp;
-}
-
-void ModelImp::load_cmvn(const char *filename)
-{
-    ifstream cmvn_stream(filename);
-    string line;
-
-    while (getline(cmvn_stream, line)) {
-        istringstream iss(line);
-        vector<string> line_item{istream_iterator<string>{iss}, istream_iterator<string>{}};
-        if (line_item[0] == "<AddShift>") {
-            getline(cmvn_stream, line);
-            istringstream means_lines_stream(line);
-            vector<string> means_lines{istream_iterator<string>{means_lines_stream}, istream_iterator<string>{}};
-            if (means_lines[0] == "<LearnRateCoef>") {
-                for (int j = 3; j < means_lines.size() - 1; j++) {
-                    means_list.push_back(stof(means_lines[j]));
-                }
-                continue;
-            }
-        }
-        else if (line_item[0] == "<Rescale>") {
-            getline(cmvn_stream, line);
-            istringstream vars_lines_stream(line);
-            vector<string> vars_lines{istream_iterator<string>{vars_lines_stream}, istream_iterator<string>{}};
-            if (vars_lines[0] == "<LearnRateCoef>") {
-                for (int j = 3; j < vars_lines.size() - 1; j++) {
-                    vars_list.push_back(stof(vars_lines[j])*scale);
-                }
-                continue;
-            }
-        }
-    }
-}
-
-void ModelImp::apply_cmvn(Tensor<float>* din)
-{
-    const float* var;
-    const float* mean;
-    var = vars_list.data();
-    mean= means_list.data();
-
-    int m = din->size[2];
-    int n = din->size[3];
-
-    for (int i = 0; i < m; i++) {
-        for (int j = 0; j < n; j++) {
-            int idx = i * n + j;
-            din->buff[idx] = (din->buff[idx] + mean[j]) * var[j];
-        }
-    }
-}
-
-string ModelImp::greedy_search(float * in, int nLen )
-{
-    vector<int> hyps;
-    int Tmax = nLen;
-    for (int i = 0; i < Tmax; i++) {
-        int max_idx;
-        float max_val;
-        findmax(in + i * 8404, 8404, max_val, max_idx);
-        hyps.push_back(max_idx);
-    }
-
-    return vocab->vector2stringV2(hyps);
-}
-
-string ModelImp::forward(float* din, int len, int flag)
-{
-    Tensor<float>* in;
-    FeatureExtract* fe = new FeatureExtract(3);
-    fe->reset();
-    fe->insert(plan, din, len, flag);
-    fe->fetch(in);
-    apply_lfr(in);
-    apply_cmvn(in);
-    Ort::RunOptions run_option;
-
-#ifdef _WIN_X86
-        Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
-#else
-        Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
-#endif
-
-    std::array<int64_t, 3> input_shape_{ in->size[0],in->size[2],in->size[3] };
-    Ort::Value onnx_feats = Ort::Value::CreateTensor<float>(m_memoryInfo,
-        in->buff,
-        in->buff_size,
-        input_shape_.data(),
-        input_shape_.size());
-
-    std::vector<int32_t> feats_len{ in->size[2] };
-    std::vector<int64_t> feats_len_dim{ 1 };
-    Ort::Value onnx_feats_len = Ort::Value::CreateTensor(
-        m_memoryInfo,
-        feats_len.data(),
-        feats_len.size() * sizeof(int32_t),
-        feats_len_dim.data(),
-        feats_len_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32);
-    std::vector<Ort::Value> input_onnx;
-    input_onnx.emplace_back(std::move(onnx_feats));
-    input_onnx.emplace_back(std::move(onnx_feats_len));
-
-    string result;
-    try {
-
-        auto outputTensor = m_session->Run(run_option, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size());
-        std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
-
-        int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
-        float* floatData = outputTensor[0].GetTensorMutableData<float>();
-        auto encoder_out_lens = outputTensor[1].GetTensorMutableData<int64_t>();
-        result = greedy_search(floatData, *encoder_out_lens);
-    }
-    catch (...)
-    {
-        result = "";
-    }
-
-    if(in){
-        delete in;
-        in = nullptr;
-    }
-    if(fe){
-        delete fe;
-        fe = nullptr;
-    }
-
-    return result;
-}
-
-string ModelImp::forward_chunk(float* din, int len, int flag)
-{
-
-    printf("Not Imp!!!!!!\n");
-    return "Hello";
-}
-
-string ModelImp::rescoring()
-{
-    printf("Not Imp!!!!!!\n");
-    return "Hello";
-}
diff --git a/funasr/runtime/onnxruntime/src/paraformer_onnx.h b/funasr/runtime/onnxruntime/src/paraformer_onnx.h
deleted file mode 100644
index 8946ae1..0000000
--- a/funasr/runtime/onnxruntime/src/paraformer_onnx.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#pragma once
-
-
-#ifndef PARAFORMER_MODELIMP_H
-#define PARAFORMER_MODELIMP_H
-
-namespace paraformer {
-
-    class ModelImp : public Model {
-    private:
-        int fft_size=512;
-        float *fft_input;
-        fftwf_complex *fft_out;
-        fftwf_plan plan;
-
-        Vocab* vocab;
-        vector<float> means_list;
-        vector<float> vars_list;
-        const float scale = 22.6274169979695;
-
-        void apply_lfr(Tensor<float>*& din);
-        void apply_cmvn(Tensor<float>* din);
-        void load_cmvn(const char *filename);
-
-        string greedy_search( float* in, int nLen);
-
-        std::unique_ptr<Ort::Session> m_session;
-        Ort::Env env_;
-        Ort::SessionOptions sessionOptions;
-
-        vector<string> m_strInputNames, m_strOutputNames;
-        vector<const char*> m_szInputNames;
-        vector<const char*> m_szOutputNames;
-
-    public:
-        ModelImp(const char* path, int nNumThread=0, bool quantize=false);
-        ~ModelImp();
-        void reset();
-        string forward_chunk(float* din, int len, int flag);
-        string forward(float* din, int len, int flag);
-        string rescoring();
-
-    };
-
-} // namespace paraformer
-#endif
diff --git a/funasr/runtime/onnxruntime/src/precomp.h b/funasr/runtime/onnxruntime/src/precomp.h
index 3aeed14..d567f15 100644
--- a/funasr/runtime/onnxruntime/src/precomp.h
+++ b/funasr/runtime/onnxruntime/src/precomp.h
@@ -1,6 +1,5 @@
 #pragma once 
 // system 
-
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@@ -16,38 +15,31 @@
 #include <string>
 #include <math.h>
 #include <numeric>
-
-
 #include <cstring>
 
 using namespace std;
 // third part
-
-#include <fftw3.h>
 #include "onnxruntime_run_options_config_keys.h"
 #include "onnxruntime_cxx_api.h"
-
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+#include "kaldi-native-fbank/csrc/online-feature.h"
 
 // mine
-
+#include "common-struct.h"
+#include "com-define.h"
 #include "commonfunc.h"
-#include <ComDefine.h>
-#include "predefine_coe.h"
-
-#include <ComDefine.h>
-//#include "alignedmem.h"
-#include "Vocab.h"
-#include "Tensor.h"
+#include "predefine-coe.h"
+#include "tokenizer.h"
+#include "ct-transformer.h"
+#include "fsmn-vad.h"
+#include "e2e-vad.h"
+#include "vocab.h"
+#include "audio.h"
+#include "tensor.h"
 #include "util.h"
-#include "CommonStruct.h"
-#include "FeatureExtract.h"
-#include "FeatureQueue.h"
-#include "SpeechWrap.h"
-#include <Audio.h>
 #include "resample.h"
-#include "Model.h"
-#include "paraformer_onnx.h"
+#include "model.h"
+#include "paraformer.h"
 #include "libfunasrapi.h"
-
 
 using namespace paraformer;
diff --git a/funasr/runtime/onnxruntime/src/predefine_coe.h b/funasr/runtime/onnxruntime/src/predefine-coe.h
similarity index 100%
rename from funasr/runtime/onnxruntime/src/predefine_coe.h
rename to funasr/runtime/onnxruntime/src/predefine-coe.h
diff --git a/funasr/runtime/onnxruntime/src/Tensor.h b/funasr/runtime/onnxruntime/src/tensor.h
similarity index 98%
rename from funasr/runtime/onnxruntime/src/Tensor.h
rename to funasr/runtime/onnxruntime/src/tensor.h
index 68ac9aa..3b7a633 100644
--- a/funasr/runtime/onnxruntime/src/Tensor.h
+++ b/funasr/runtime/onnxruntime/src/tensor.h
@@ -71,7 +71,7 @@
 {
     buff_size = size[0] * size[1] * size[2] * size[3];
     mem_size = buff_size;
-    buff = (T *)aligned_malloc(32, buff_size * sizeof(T));
+    buff = (T *)AlignedMalloc(32, buff_size * sizeof(T));
 }
 
 template <typename T> void Tensor<T>::free_buff()
diff --git a/funasr/runtime/onnxruntime/src/tmp.h b/funasr/runtime/onnxruntime/src/tmp.h
deleted file mode 100644
index b57303f..0000000
--- a/funasr/runtime/onnxruntime/src/tmp.h
+++ /dev/null
@@ -1,112 +0,0 @@
-
-#ifndef WENETPARAMS_H
-#define WENETPARAMS_H
-// #pragma pack(1)
-
-#define vocab_size 5538
-
-typedef struct {
-    float conv0_weight[512 * 9];
-    float conv0_bias[512];
-
-    float conv1_weight[512 * 512 * 9];
-    float conv1_bias[512];
-
-    float out0_weight[9728 * 512];
-    float out0_bias[512];
-
-} EncEmbedParams;
-
-typedef struct {
-    float linear_q_weight[512 * 512];
-    float linear_q_bias[512];
-    float linear_k_weight[512 * 512];
-    float linear_k_bias[512];
-    float linear_v_weight[512 * 512];
-    float linear_v_bias[512];
-    float linear_out_weight[512 * 512];
-    float linear_out_bias[512];
-} SelfAttnParams;
-
-typedef struct {
-    SelfAttnParams linear0;
-    float linear_pos_weight[512 * 512];
-    float pos_bias_u[512];
-    float pos_bias_v[512];
-
-} EncSelfAttnParams;
-
-typedef struct {
-    float w1_weight[512 * 2048];
-    float w1_bias[2048];
-    float w2_weight[2048 * 512];
-    float w2_bias[512];
-} FeedForwardParams;
-
-typedef struct {
-    float weight[512];
-    float bias[512];
-} NormParams;
-
-typedef struct {
-    float pointwise_conv1_weight[1024 * 512];
-    float pointwise_conv1_bias[1024];
-
-    float depthwise_conv_weight[512 * 15];
-    float depthwise_conv_bias[512];
-
-    float pointwise_conv2_weight[512 * 512];
-    float pointwise_conv2_bias[512];
-    NormParams norm;
-} EncConvParams;
-
-typedef struct {
-    EncSelfAttnParams self_attn;
-    FeedForwardParams feedforward;
-    FeedForwardParams feedforward_macaron;
-    EncConvParams conv_module;
-    NormParams norm_ff;
-    NormParams norm_mha;
-    NormParams norm_macaron;
-    NormParams norm_conv;
-    NormParams norm_final;
-    // float concat_weight[1024 * 512];
-    // float concat_bias[512];
-} SubEncoderParams;
-
-typedef struct {
-    EncEmbedParams embed;
-    SubEncoderParams sub_encoder[12];
-    NormParams after_norm;
-} EncoderParams;
-
-typedef struct {
-    SelfAttnParams self_attn;
-    SelfAttnParams src_attn;
-    FeedForwardParams feedward;
-    NormParams norm1;
-    NormParams norm2;
-    NormParams norm3;
-    // float concat_weight1[1024 * 512];
-    // float concat_bias1[512];
-    // float concat_weight2[1024 * 512];
-    // float concat_bias2[512];
-} SubDecoderParams;
-
-typedef struct {
-    float embed_weight[vocab_size * 512];
-    SubDecoderParams sub_decoder[6];
-    NormParams after_norm;
-    float output_weight[vocab_size * 512];
-    float output_bias[vocab_size];
-} DecoderParams;
-
-typedef struct {
-    EncoderParams encoder;
-    float ctc_weight[512 * vocab_size];
-    float ctc_bias[vocab_size];
-    DecoderParams decoder;
-} WenetParams;
-
-// #pragma pack()
-#endif
diff --git a/funasr/runtime/onnxruntime/src/tokenizer.cpp b/funasr/runtime/onnxruntime/src/tokenizer.cpp
new file mode 100644
index 0000000..ef0c533
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/tokenizer.cpp
@@ -0,0 +1,208 @@
+ #include "precomp.h"
+
+CTokenizer::CTokenizer(const char* sz_yamlfile):m_ready(false)
+{
+	OpenYaml(sz_yamlfile);
+}
+
+CTokenizer::CTokenizer():m_ready(false)
+{
+}
+
+void CTokenizer::ReadYaml(const YAML::Node& node) 
+{
+	if (node.IsMap()) 
+	{//锟斤拷map锟斤拷
+		for (auto it = node.begin(); it != node.end(); ++it) 
+		{
+			ReadYaml(it->second);
+		}
+	}
+	if (node.IsSequence()) {//锟斤拷锟斤拷锟斤拷锟斤拷
+		for (size_t i = 0; i < node.size(); ++i) {
+			ReadYaml(node[i]);
+		}
+	}
+	if (node.IsScalar()) {//锟角憋拷锟斤拷锟斤拷
+		cout << node.as<string>() << endl;
+	}
+}
+
+bool CTokenizer::OpenYaml(const char* sz_yamlfile)
+{
+	YAML::Node m_Config = YAML::LoadFile(sz_yamlfile);
+	if (m_Config.IsNull())
+		return false;
+	try
+	{
+		auto Tokens = m_Config["token_list"];
+		if (Tokens.IsSequence())
+		{
+			for (size_t i = 0; i < Tokens.size(); ++i) 
+			{
+				if (Tokens[i].IsScalar())
+				{
+					m_id2token.push_back(Tokens[i].as<string>());
+					m_token2id.insert(make_pair<string, int>(Tokens[i].as<string>(), i));
+				}
+			}
+		}
+		auto Puncs = m_Config["punc_list"];
+		if (Puncs.IsSequence())
+		{
+			for (size_t i = 0; i < Puncs.size(); ++i)
+			{
+				if (Puncs[i].IsScalar())
+				{ 
+					m_id2punc.push_back(Puncs[i].as<string>());
+					m_punc2id.insert(make_pair<string, int>(Puncs[i].as<string>(), i));
+				}
+			}
+		}
+	}
+	catch (YAML::BadFile& e) {
+		std::cout << "read error!" << std::endl;
+		return  false;
+	}
+	m_ready = true;
+	return m_ready;
+}
+
+vector<string> CTokenizer::Id2String(vector<int> input)
+{
+	vector<string> result;
+	for (auto& item : input)
+	{
+		result.push_back(m_id2token[item]);
+	}
+	return result;
+}
+
+int CTokenizer::String2Id(string input)
+{
+	int nID = 0; // <blank>
+	if (m_token2id.find(input) != m_token2id.end())
+		nID=(m_token2id[input]);
+	else
+		nID=(m_token2id[UNK_CHAR]);
+	return nID;
+}
+
+vector<int> CTokenizer::String2Ids(vector<string> input)
+{
+	vector<int> result;
+	for (auto& item : input)
+	{	
+		transform(item.begin(), item.end(), item.begin(), ::tolower);
+		if (m_token2id.find(item) != m_token2id.end())
+			result.push_back(m_token2id[item]);
+		else
+			result.push_back(m_token2id[UNK_CHAR]);
+	}
+	return result;
+}
+
+vector<string> CTokenizer::Id2Punc(vector<int> input)
+{
+	vector<string> result;
+	for (auto& item : input)
+	{
+		result.push_back(m_id2punc[item]);
+	}
+	return result;
+}
+
+string CTokenizer::Id2Punc(int n_punc_id)
+{
+	return m_id2punc[n_punc_id];
+}
+
+vector<int> CTokenizer::Punc2Ids(vector<string> input)
+{
+	vector<int> result;
+	for (auto& item : input)
+	{
+		result.push_back(m_punc2id[item]);
+	}
+	return result;
+}
+
+vector<string> CTokenizer::SplitChineseString(const string & str_info)
+{
+	vector<string> list;
+	int strSize = str_info.size();
+	int i = 0;
+
+	while (i < strSize) {
+		int len = 1;
+		for (int j = 0; j < 6 && (str_info[i] & (0x80 >> j)); j++) {
+			len = j + 1;
+		}
+		list.push_back(str_info.substr(i, len));
+		i += len;
+	}
+	return list;
+}
+
+void CTokenizer::StrSplit(const string& str, const char split, vector<string>& res)
+{
+	if (str == "")
+	{
+		return;
+	}
+	string&& strs = str + split;
+	size_t pos = strs.find(split);
+
+	while (pos != string::npos)
+	{
+		res.emplace_back(strs.substr(0, pos));
+		strs = move(strs.substr(pos + 1, strs.size()));
+		pos = strs.find(split);
+	}
+}
+
+ void CTokenizer::Tokenize(const char* str_info, vector<string> & str_out, vector<int> & id_out)
+{
+	vector<string>  strList;
+	StrSplit(str_info,' ', strList);
+	string current_eng,current_chinese;
+	for (auto& item : strList)
+	{
+		current_eng = "";
+		current_chinese = "";
+		for (auto& ch : item)
+		{
+			if (!(ch& 0x80))
+			{ // 英锟斤拷
+				if (current_chinese.size() > 0)
+				{
+					// for utf-8 chinese
+					auto chineseList = SplitChineseString(current_chinese);
+					str_out.insert(str_out.end(), chineseList.begin(),chineseList.end());
+					current_chinese = "";
+				}
+				current_eng += ch;
+			}
+			else
+			{
+				if (current_eng.size() > 0)
+				{
+					str_out.push_back(current_eng);
+					current_eng = "";
+				}
+				current_chinese += ch;
+			}
+		}
+		if (current_chinese.size() > 0)
+		{
+			auto chineseList = SplitChineseString(current_chinese);
+			str_out.insert(str_out.end(), chineseList.begin(), chineseList.end());
+			current_chinese = "";
+		}
+		if (current_eng.size() > 0)
+		{
+			str_out.push_back(current_eng);
+		}
+	}
+	id_out= String2Ids(str_out);
+}
diff --git a/funasr/runtime/onnxruntime/src/tokenizer.h b/funasr/runtime/onnxruntime/src/tokenizer.h
new file mode 100644
index 0000000..319975a
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/tokenizer.h
@@ -0,0 +1,27 @@
+#pragma once
+#include "yaml-cpp/yaml.h"
+
+class CTokenizer {
+private:
+
+	bool  m_ready = false;
+	vector<string>   m_id2token,m_id2punc;
+	map<string, int>  m_token2id,m_punc2id;
+
+public:
+
+	CTokenizer(const char* sz_yamlfile);
+	CTokenizer();
+	bool OpenYaml(const char* sz_yamlfile);
+	void ReadYaml(const YAML::Node& node);
+	vector<string> Id2String(vector<int> input);
+	vector<int> String2Ids(vector<string> input);
+	int String2Id(string input);
+	vector<string> Id2Punc(vector<int> input);
+	string Id2Punc(int n_punc_id);
+	vector<int> Punc2Ids(vector<string> input);
+	vector<string> SplitChineseString(const string& str_info);
+	void StrSplit(const string& str, const char split, vector<string>& res);
+	void Tokenize(const char* str_info, vector<string>& str_out, vector<int>& id_out);
+
+};
diff --git a/funasr/runtime/onnxruntime/src/util.cpp b/funasr/runtime/onnxruntime/src/util.cpp
index 5a72c72..c5c27af 100644
--- a/funasr/runtime/onnxruntime/src/util.cpp
+++ b/funasr/runtime/onnxruntime/src/util.cpp
@@ -1,7 +1,7 @@
 
 #include "precomp.h"
 
-float *loadparams(const char *filename)
+float *LoadParams(const char *filename)
 {
 
     FILE *fp;
@@ -10,20 +10,20 @@
     uint32_t nFileLen = ftell(fp);
     fseek(fp, 0, SEEK_SET);
 
-    float *params_addr = (float *)aligned_malloc(32, nFileLen);
+    float *params_addr = (float *)AlignedMalloc(32, nFileLen);
     int n = fread(params_addr, 1, nFileLen, fp);
     fclose(fp);
 
     return params_addr;
 }
 
-int val_align(int val, int align)
+int ValAlign(int val, int align)
 {
     float tmp = ceil((float)val / (float)align) * (float)align;
     return (int)tmp;
 }
 
-void disp_params(float *din, int size)
+void DispParams(float *din, int size)
 {
     int i;
     for (i = 0; i < size; i++) {
@@ -39,7 +39,7 @@
     fclose(fp);
 }
 
-void basic_norm(Tensor<float> *&din, float norm)
+void BasicNorm(Tensor<float> *&din, float norm)
 {
 
     int Tmax = din->size[2];
@@ -59,7 +59,7 @@
     }
 }
 
-void findmax(float *din, int len, float &max_val, int &max_idx)
+void FindMax(float *din, int len, float &max_val, int &max_idx)
 {
     int i;
     max_val = -INFINITY;
@@ -72,7 +72,7 @@
     }
 }
 
-string pathAppend(const string &p1, const string &p2)
+string PathAppend(const string &p1, const string &p2)
 {
 
     char sep = '/';
@@ -89,7 +89,7 @@
         return (p1 + p2);
 }
 
-void relu(Tensor<float> *din)
+void Relu(Tensor<float> *din)
 {
     int i;
     for (i = 0; i < din->buff_size; i++) {
@@ -98,7 +98,7 @@
     }
 }
 
-void swish(Tensor<float> *din)
+void Swish(Tensor<float> *din)
 {
     int i;
     for (i = 0; i < din->buff_size; i++) {
@@ -107,7 +107,7 @@
     }
 }
 
-void sigmoid(Tensor<float> *din)
+void Sigmoid(Tensor<float> *din)
 {
     int i;
     for (i = 0; i < din->buff_size; i++) {
@@ -116,7 +116,7 @@
     }
 }
 
-void doubleswish(Tensor<float> *din)
+void DoubleSwish(Tensor<float> *din)
 {
     int i;
     for (i = 0; i < din->buff_size; i++) {
@@ -125,7 +125,7 @@
     }
 }
 
-void softmax(float *din, int mask, int len)
+void Softmax(float *din, int mask, int len)
 {
     float *tmp = (float *)malloc(mask * sizeof(float));
     int i;
@@ -149,7 +149,7 @@
     }
 }
 
-void log_softmax(float *din, int len)
+void LogSoftmax(float *din, int len)
 {
     float *tmp = (float *)malloc(len * sizeof(float));
     int i;
@@ -164,7 +164,7 @@
     free(tmp);
 }
 
-void glu(Tensor<float> *din, Tensor<float> *dout)
+void Glu(Tensor<float> *din, Tensor<float> *dout)
 {
     int mm = din->buff_size / 1024;
     int i, j;
diff --git a/funasr/runtime/onnxruntime/src/util.h b/funasr/runtime/onnxruntime/src/util.h
index 48a27db..6327f7b 100644
--- a/funasr/runtime/onnxruntime/src/util.h
+++ b/funasr/runtime/onnxruntime/src/util.h
@@ -5,26 +5,26 @@
 
 using namespace std;
 
-extern float *loadparams(const char *filename);
+extern float *LoadParams(const char *filename);
 
 extern void SaveDataFile(const char *filename, void *data, uint32_t len);
-extern void relu(Tensor<float> *din);
-extern void swish(Tensor<float> *din);
-extern void sigmoid(Tensor<float> *din);
-extern void doubleswish(Tensor<float> *din);
+extern void Relu(Tensor<float> *din);
+extern void Swish(Tensor<float> *din);
+extern void Sigmoid(Tensor<float> *din);
+extern void DoubleSwish(Tensor<float> *din);
 
-extern void softmax(float *din, int mask, int len);
+extern void Softmax(float *din, int mask, int len);
 
-extern void log_softmax(float *din, int len);
-extern int val_align(int val, int align);
-extern void disp_params(float *din, int size);
+extern void LogSoftmax(float *din, int len);
+extern int ValAlign(int val, int align);
+extern void DispParams(float *din, int size);
 
-extern void basic_norm(Tensor<float> *&din, float norm);
+extern void BasicNorm(Tensor<float> *&din, float norm);
 
-extern void findmax(float *din, int len, float &max_val, int &max_idx);
+extern void FindMax(float *din, int len, float &max_val, int &max_idx);
 
-extern void glu(Tensor<float> *din, Tensor<float> *dout);
+extern void Glu(Tensor<float> *din, Tensor<float> *dout);
 
-string pathAppend(const string &p1, const string &p2);
+string PathAppend(const string &p1, const string &p2);
 
 #endif
diff --git a/funasr/runtime/onnxruntime/src/Vocab.cpp b/funasr/runtime/onnxruntime/src/vocab.cpp
similarity index 88%
rename from funasr/runtime/onnxruntime/src/Vocab.cpp
rename to funasr/runtime/onnxruntime/src/vocab.cpp
index b54a6c6..ba041b8 100644
--- a/funasr/runtime/onnxruntime/src/Vocab.cpp
+++ b/funasr/runtime/onnxruntime/src/vocab.cpp
@@ -1,4 +1,4 @@
-#include "Vocab.h"
+#include "vocab.h"
 #include "yaml-cpp/yaml.h"
 
 #include <fstream>
@@ -12,13 +12,13 @@
 Vocab::Vocab(const char *filename)
 {
     ifstream in(filename);
-    loadVocabFromYaml(filename);
+    LoadVocabFromYaml(filename);
 }
 Vocab::~Vocab()
 {
 }
 
-void Vocab::loadVocabFromYaml(const char* filename){
+void Vocab::LoadVocabFromYaml(const char* filename){
     YAML::Node config;
     try{
         config = YAML::LoadFile(filename);
@@ -26,72 +26,62 @@
         printf("error loading file, yaml file error or not exist.\n");
         exit(-1);
     }
-
     YAML::Node myList = config["token_list"];
     for (YAML::const_iterator it = myList.begin(); it != myList.end(); ++it) {
         vocab.push_back(it->as<string>());
     }
 }
 
-string Vocab::vector2string(vector<int> in)
+string Vocab::Vector2String(vector<int> in)
 {
     int i;
     stringstream ss;
     for (auto it = in.begin(); it != in.end(); it++) {
         ss << vocab[*it];
     }
-
     return ss.str();
 }
 
-int str2int(string str)
+int Str2Int(string str)
 {
     const char *ch_array = str.c_str();
     if (((ch_array[0] & 0xf0) != 0xe0) || ((ch_array[1] & 0xc0) != 0x80) ||
         ((ch_array[2] & 0xc0) != 0x80))
         return 0;
-
     int val = ((ch_array[0] & 0x0f) << 12) | ((ch_array[1] & 0x3f) << 6) |
               (ch_array[2] & 0x3f);
     return val;
 }
 
-bool Vocab::isChinese(string ch)
+bool Vocab::IsChinese(string ch)
 {
     if (ch.size() != 3) {
         return false;
     }
-
-    int unicode = str2int(ch);
+    int unicode = Str2Int(ch);
     if (unicode >= 19968 && unicode <= 40959) {
         return true;
     }
-
     return false;
 }
 
-string Vocab::vector2stringV2(vector<int> in)
+string Vocab::Vector2StringV2(vector<int> in)
 {
     int i;
     list<string> words;
-
     int is_pre_english = false;
     int pre_english_len = 0;
-
     int is_combining = false;
     string combine = "";
 
     for (auto it = in.begin(); it != in.end(); it++) {
         string word = vocab[*it];
-
         // step1 space character skips
         if (word == "<s>" || word == "</s>" || word == "<unk>")
             continue;
-
         // step2 combie phoneme to full word
         {
             int sub_word = !(word.find("@@") == string::npos);
-
             // process word start and middle part
             if (sub_word) {
                 combine += word.erase(word.length() - 2);
@@ -109,15 +99,13 @@
 
         // step3 process english word deal with space , turn abbreviation to upper case
         {
-
             // input word is chinese, not need process 
-            if (isChinese(word)) {
+            if (IsChinese(word)) {
                 words.push_back(word);
                 is_pre_english = false;
             }
             // input word is english word
             else {
-
                 // pre word is chinese
                 if (!is_pre_english) {
                     word[0] = word[0] - 32;
@@ -125,10 +113,8 @@
                     pre_english_len = word.size();
 
                 }
-
                 // pre word is english word
                 else {
-
                     // single letter turn to upper case
                     if (word.size() == 1) {
                         word[0] = word[0] - 32;
@@ -147,16 +133,10 @@
                         pre_english_len = word.size();
                     }
                 }
-
                 is_pre_english = true;
-
             }
         }
     }
-
-    // for (auto it = words.begin(); it != words.end(); it++) {
-    //     cout << *it << endl;
-    // }
 
     stringstream ss;
     for (auto it = words.begin(); it != words.end(); it++) {
@@ -166,7 +146,7 @@
     return ss.str();
 }
 
-int Vocab::size()
+int Vocab::Size()
 {
     return vocab.size();
 }
diff --git a/funasr/runtime/onnxruntime/src/vocab.h b/funasr/runtime/onnxruntime/src/vocab.h
new file mode 100644
index 0000000..a3fdf65
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/vocab.h
@@ -0,0 +1,25 @@
+
+#ifndef VOCAB_H
+#define VOCAB_H
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+using namespace std;
+
+class Vocab {
+  private:
+    vector<string> vocab;
+    bool IsChinese(string ch);
+    bool IsEnglish(string ch);
+    void LoadVocabFromYaml(const char* filename);
+
+  public:
+    Vocab(const char *filename);
+    ~Vocab();
+    int Size();
+    string Vector2String(vector<int> in);
+    string Vector2StringV2(vector<int> in);
+};
+
+#endif
diff --git a/funasr/runtime/onnxruntime/tester/CMakeLists.txt b/funasr/runtime/onnxruntime/tester/CMakeLists.txt
deleted file mode 100644
index e3224e3..0000000
--- a/funasr/runtime/onnxruntime/tester/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-if(WIN32)
-    if(CMAKE_CL_64)
-        link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x64 )
-    else()
-        link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x86 )
-    endif()
-endif()
-
-set(EXTRA_LIBS funasr)
-
-
-include_directories(${CMAKE_SOURCE_DIR}/include)
-set(EXECNAME "tester")
-set(EXECNAMERTF "tester_rtf")
-
-add_executable(${EXECNAME} "tester.cpp")
-target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS})
-
-add_executable(${EXECNAMERTF} "tester_rtf.cpp")
-target_link_libraries(${EXECNAMERTF} PUBLIC ${EXTRA_LIBS})
-
diff --git a/funasr/runtime/onnxruntime/tester/tester_rtf.cpp b/funasr/runtime/onnxruntime/tester/tester_rtf.cpp
deleted file mode 100644
index dd79887..0000000
--- a/funasr/runtime/onnxruntime/tester/tester_rtf.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-
-#ifndef _WIN32
-#include <sys/time.h>
-#else
-#include <win_func.h>
-#endif
-
-#include "libfunasrapi.h"
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <vector>
-using namespace std;
-
-int main(int argc, char *argv[])
-{
-
-    if (argc < 4)
-    {
-        printf("Usage: %s /path/to/model_dir /path/to/wav.scp quantize(true or false) \n", argv[0]);
-        exit(-1);
-    }
-
-    // read wav.scp
-    vector<string> wav_list;
-    ifstream in(argv[2]);
-    if (!in.is_open()) {
-        printf("Failed to open file: %s", argv[2]);
-        return 0;
-    }
-    string line;
-    while(getline(in, line))
-    {
-        istringstream iss(line);
-        string column1, column2;
-        iss >> column1 >> column2;
-        wav_list.push_back(column2); 
-    }
-    in.close();
-
-    // model init
-    struct timeval start, end;
-    gettimeofday(&start, NULL);
-    int nThreadNum = 1;
-    // is quantize
-    bool quantize = false;
-    istringstream(argv[3]) >> boolalpha >> quantize;
-
-    FUNASR_HANDLE AsrHanlde=FunASRInit(argv[1], nThreadNum, quantize);
-    if (!AsrHanlde)
-    {
-        printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
-        exit(-1);
-    }
-    gettimeofday(&end, NULL);
-    long seconds = (end.tv_sec - start.tv_sec);
-    long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
-    printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);
-
-    // warm up
-    for (size_t i = 0; i < 30; i++)
-    {
-        FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, wav_list[0].c_str(), RASR_NONE, NULL);
-    }
-
-    // forward
-    float snippet_time = 0.0f;
-    float total_length = 0.0f;
-    long total_time = 0.0f;
-    
-    for (size_t i = 0; i < wav_list.size(); i++)
-    {
-        gettimeofday(&start, NULL);
-        FUNASR_RESULT Result=FunASRRecogFile(AsrHanlde, wav_list[i].c_str(), RASR_NONE, NULL);
-        gettimeofday(&end, NULL);
-        seconds = (end.tv_sec - start.tv_sec);
-        long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
-        total_time += taking_micros;
-
-        if(Result){
-            string msg = FunASRGetResult(Result, 0);
-            printf("Result: %s \n", msg.c_str());
-
-            snippet_time = FunASRGetRetSnippetTime(Result);
-            total_length += snippet_time;
-            FunASRFreeResult(Result);
-        }else{
-            cout <<"No return data!";
-        }
-    }
-
-    printf("total_time_wav %ld ms.\n", (long)(total_length * 1000));
-    printf("total_time_comput %ld ms.\n", total_time / 1000);
-    printf("total_rtf %05lf .\n", (double)total_time/ (total_length*1000000));
-
-    FunASRUninit(AsrHanlde);
-    return 0;
-}
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.clang-format b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.clang-format
new file mode 100644
index 0000000..c65e772
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.clang-format
@@ -0,0 +1,9 @@
+---
+BasedOnStyle: Google
+---
+Language:               Cpp
+Cpp11BracedListStyle:   true
+Standard:               Cpp11
+DerivePointerAlignment: false
+PointerAlignment:       Right
+---
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/linux-macos.yaml b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/linux-macos.yaml
new file mode 100644
index 0000000..834e7d7
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/linux-macos.yaml
@@ -0,0 +1,70 @@
+name: linux-macos
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+concurrency:
+  group: linux-macos-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  linux_macos:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Configure Cmake
+        shell: bash
+        run: |
+          mkdir build
+          cd build
+          cmake -D CMAKE_BUILD_TYPE=Release ..
+
+      - name: Build kaldi-native-fbank for ubuntu/macos
+        if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'macos')
+        run: |
+          cd build
+          make -j2
+          ctest --output-on-failure
+
+      - name: Run tests for ubuntu/macos
+        if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'macos')
+        run: |
+          cd build
+          ctest --output-on-failure
+
+      - name: Setup Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.8"
+
+      - name: Install Python dependencies
+        shell: bash
+        run: |
+          python3 -m pip install --upgrade pip
+          python3 -m pip install wheel twine setuptools
+
+      - name: Build Python
+        shell: bash
+        run: |
+          python3 setup.py install
+
+      - name: Display Python package version
+        shell: bash
+        run: |
+          python3 -c "import kaldi_native_fbank; print(kaldi_native_fbank.__version__)"
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x64.yaml b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x64.yaml
new file mode 100644
index 0000000..0939147
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x64.yaml
@@ -0,0 +1,97 @@
+name: windows-x64
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+concurrency:
+  group: windows-x64-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  windows_x64:
+    runs-on: ${{ matrix.os }}
+    name: ${{ matrix.vs-version }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - vs-version: vs2015
+            toolset-version: v140
+            os: windows-2019
+
+          - vs-version: vs2017
+            toolset-version: v141
+            os: windows-2019
+
+          - vs-version: vs2019
+            toolset-version: v142
+            os: windows-2022
+
+          - vs-version: vs2022
+            toolset-version: v143
+            os: windows-2022
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Configure CMake
+        shell: bash
+        run: |
+          mkdir build
+          cd build
+          cmake -T ${{ matrix.toolset-version}},host=x64 -D CMAKE_BUILD_TYPE=Release ..
+
+
+      - name: Build kaldi-native-fbank for windows
+        shell: bash
+        run: |
+          cd build
+          cmake --build . --target ALL_BUILD --config Release
+          cat CMakeCache.txt
+
+      - name: Run tests for windows
+        shell: bash
+        run: |
+          cd build
+          ctest --verbose -C Release --output-on-failure -E py
+
+      - name: Setup Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.8"
+
+      - name: Install Python dependencies
+        shell: bash
+        run: |
+          export KALDI_NATIVE_FBANK_CMAKE_ARGS="-T ${{ matrix.toolset-version}},host=x64 -A x64"
+          python3 -m pip install --upgrade pip pytest
+          python3 -m pip install wheel twine setuptools
+
+      - name: Build Python
+        shell: bash
+        run: |
+          export KALDI_NATIVE_FBANK_CMAKE_ARGS="-T ${{ matrix.toolset-version}},host=x64 -A x64"
+          python3 setup.py install
+
+      - name: Display Python package version
+        shell: bash
+        run: |
+          python3 -c "import kaldi_native_fbank; print(kaldi_native_fbank.__version__)"
+
+      - name: Run Python tests
+        shell: bash
+        run: |
+          cd kaldi-native-fbank/python/tests
+          python3 ./test_fbank_options.py
+          python3 ./test_frame_extraction_options.py
+          python3 ./test_mel_bank_options.py
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x86.yaml b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x86.yaml
new file mode 100644
index 0000000..2808702
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/.github/workflows/windows-x86.yaml
@@ -0,0 +1,67 @@
+name: windows-x86
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+concurrency:
+  group: windows-x86-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  windows_x86:
+    runs-on: ${{ matrix.os }}
+    name: ${{ matrix.vs-version }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - vs-version: vs2015
+            toolset-version: v140
+            os: windows-2019
+
+          - vs-version: vs2017
+            toolset-version: v141
+            os: windows-2019
+
+          - vs-version: vs2019
+            toolset-version: v142
+            os: windows-2022
+
+          - vs-version: vs2022
+            toolset-version: v143
+            os: windows-2022
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Configure CMake
+        shell: bash
+        run: |
+          mkdir build
+          cd build
+          cmake -T ${{ matrix.toolset-version}},host=x64 -A Win32 -D CMAKE_BUILD_TYPE=Release -DKALDI_NATIVE_FBANK_BUILD_PYTHON=OFF ..
+          cat CMakeCache.txt
+
+
+      - name: Build kaldi-native-fbank for windows
+        shell: bash
+        run: |
+          cd build
+          cmake --build . --target ALL_BUILD --config Release
+
+      - name: Run tests for windows
+        shell: bash
+        run: |
+          cd build
+
+          ctest --verbose -C Release --output-on-failure -E py
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/CMakeLists.txt
new file mode 100644
index 0000000..e931a03
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/CMakeLists.txt
@@ -0,0 +1,115 @@
+if("x${CMAKE_SOURCE_DIR}" STREQUAL "x${CMAKE_BINARY_DIR}")
+  message(FATAL_ERROR "\
+In-source build is not a good practice.
+Please use:
+  mkdir build
+  cd build
+  cmake ..
+to build this project"
+  )
+endif()
+
+if(CMAKE_TOOLCHAIN_FILE)
+  set(_BUILD_PYTHON OFF)
+  set(_BUILD_TESTS OFF)
+else()
+  set(_BUILD_PYTHON ON)
+  set(_BUILD_TESTS ON)
+endif()
+
+if(POLICY CMP0057)
+  cmake_policy(SET CMP0057 NEW)
+endif()
+
+cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
+
+project(kaldi-native-fbank CXX C)
+
+set(KALDI_NATIVE_FBANK_VERSION "1.13")
+
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif()
+
+message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
+
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
+
+set(CMAKE_SKIP_BUILD_RPATH FALSE)
+set(BUILD_RPATH_USE_ORIGIN TRUE)
+set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+
+if(NOT APPLE)
+  set(kaldi_native_fbank_rpath_origin "$ORIGIN")
+else()
+  set(kaldi_native_fbank_rpath_origin "@loader_path")
+endif()
+
+set(CMAKE_INSTALL_RPATH ${kaldi_native_fbank_rpath_origin})
+set(CMAKE_BUILD_RPATH ${kaldi_native_fbank_rpath_origin})
+
+set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
+
+if(NOT DEFINED BUILD_SHARED_LIBS)
+  set(BUILD_SHARED_LIBS ON)
+endif()
+message(STATUS "BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
+
+option(KALDI_NATIVE_FBANK_BUILD_TESTS "Whether to build tests or not" ${_BUILD_TESTS})
+option(KALDI_NATIVE_FBANK_BUILD_PYTHON "Whether to build Python extension" ${_BUILD_PYTHON})
+option(KALDI_NATIVE_FBANK_ENABLE_CHECK "Whether to build with log" OFF)
+
+message(STATUS "KALDI_NATIVE_FBANK_BUILD_TESTS: ${KALDI_NATIVE_FBANK_BUILD_TESTS}")
+message(STATUS "KALDI_NATIVE_FBANK_BUILD_PYTHON: ${KALDI_NATIVE_FBANK_BUILD_PYTHON}")
+message(STATUS "KALDI_NATIVE_FBANK_ENABLE_CHECK: ${KALDI_NATIVE_FBANK_ENABLE_CHECK}")
+
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
+list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
+
+message(STATUS "KALDI_NATIVE_FBANK_ENABLE_CHECK: ${KALDI_NATIVE_FBANK_ENABLE_CHECK}")
+
+if(WIN32)
+  add_definitions(-DNOMINMAX) # Otherwise, std::max() and std::min() won't work
+endif()
+
+if(KALDI_NATIVE_FBANK_BUILD_PYTHON)
+  include(pybind11)
+endif()
+
+if(KALDI_NATIVE_FBANK_BUILD_TESTS)
+  enable_testing()
+  include(googletest)
+endif()
+
+if(NOT CMAKE_INSTALL_PREFIX)
+  set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/install")
+endif()
+
+message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
+message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
+
+include(CheckIncludeFileCXX)
+check_include_file_cxx(cxxabi.h KNF_HAVE_CXXABI_H)
+check_include_file_cxx(execinfo.h KNF_HAVE_EXECINFO_H)
+
+include_directories(${CMAKE_SOURCE_DIR})
+
+if(WIN32 AND MSVC)
+  # disable various warnings for MSVC
+  # 4244: '=': conversion from 'double' to 'float', possible loss of data
+  # 4267: 'return': conversion from 'size_t' to 'int32_t', possible loss of data
+  # 4624: destructor was implicitly defined as deleted because a base class destructor is inaccessible or deleted
+  set(disabled_warnings
+      /wd4244
+      /wd4267
+      /wd4624
+  )
+  message(STATUS "Disabled warnings: ${disabled_warnings}")
+  foreach(w IN LISTS disabled_warnings)
+    string(APPEND CMAKE_CXX_FLAGS " ${w} ")
+  endforeach()
+endif()
+
+add_subdirectory(kaldi-native-fbank)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/LICENSE b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/LICENSE
new file mode 100644
index 0000000..ee06cfc
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/LICENSE
@@ -0,0 +1,211 @@
+
+                                 Legal Notices
+
+   NOTE (this is not from the Apache License): The copyright model is that
+   authors (or their employers, if noted in individual files) own their
+   individual contributions. The authors' contributions can be discerned
+   from the git history.
+
+ -------------------------------------------------------------------------
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/MANIFEST.in b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/MANIFEST.in
new file mode 100644
index 0000000..ae2b482
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/MANIFEST.in
@@ -0,0 +1,5 @@
+include LICENSE
+include README.md
+include CMakeLists.txt
+recursive-include kaldi-native-fbank *.*
+recursive-include cmake *.*
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/README.md b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/README.md
new file mode 100644
index 0000000..2c874c1
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/README.md
@@ -0,0 +1,106 @@
+# Introduction
+
+Kaldi-compatible online fbank feature extractor without external dependencies.
+
+Tested on the following architectures and operating systems:
+
+  - Linux
+  - macOS
+  - Windows
+  - Android
+  - x86
+  - arm
+  - aarch64
+
+# Usage
+
+See the following CMake-based speech recognition (i.e., text-to-speech) projects
+for its usage:
+
+- <https://github.com/k2-fsa/sherpa-ncnn>
+  - Specifically, please have a look at <https://github.com/k2-fsa/sherpa-ncnn/blob/master/sherpa-ncnn/csrc/features.h>
+- <https://github.com/k2-fsa/sherpa-onnx>
+
+They use `kaldi-native-fbank` to compute fbank features for **real-time**
+speech recognition.
+
+# Python APIs
+
+First, please install `kaldi-native-fbank` by
+
+```bash
+git clone https://github.com/csukuangfj/kaldi-native-fbank
+cd kaldi-native-fbank
+python3 setup.py install
+```
+
+or use
+
+```bash
+pip install kaldi-native-fbank
+```
+
+To check that you have installed `kaldi-native-fbank` successfully, please use
+
+```
+python3 -c "import kaldi_native_fbank; print(kaldi_native_fbank.__version__)"
+```
+
+which should print the version you have installed.
+
+Please refer to
+<https://github.com/csukuangfj/kaldi-native-fbank/blob/master/kaldi-native-fbank/python/tests/test_online_fbank.py>
+for usages.
+
+For easier reference, we post the above file below:
+
+```python3
+#!/usr/bin/env python3
+
+import sys
+
+try:
+    import kaldifeat
+except:
+    print("Please install kaldifeat first")
+    sys.exit(0)
+
+import kaldi_native_fbank as knf
+import torch
+
+
+def main():
+    sampling_rate = 16000
+    samples = torch.randn(16000 * 10)
+
+    opts = kaldifeat.FbankOptions()
+    opts.frame_opts.dither = 0
+    opts.mel_opts.num_bins = 80
+    opts.frame_opts.snip_edges = False
+    opts.mel_opts.debug_mel = False
+
+    online_fbank = kaldifeat.OnlineFbank(opts)
+
+    online_fbank.accept_waveform(sampling_rate, samples)
+
+    opts = knf.FbankOptions()
+    opts.frame_opts.dither = 0
+    opts.mel_opts.num_bins = 80
+    opts.frame_opts.snip_edges = False
+    opts.mel_opts.debug_mel = False
+
+    fbank = knf.OnlineFbank(opts)
+    fbank.accept_waveform(sampling_rate, samples.tolist())
+
+    assert online_fbank.num_frames_ready == fbank.num_frames_ready
+    for i in range(fbank.num_frames_ready):
+        f1 = online_fbank.get_frame(i)
+        f2 = torch.from_numpy(fbank.get_frame(i))
+        assert torch.allclose(f1, f2, atol=1e-3), (i, (f1 - f2).abs().max())
+
+
+if __name__ == "__main__":
+    torch.manual_seed(20220825)
+    main()
+    print("success")
+```
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent.cmake b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent.cmake
new file mode 100644
index 0000000..98cdf6c
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent.cmake
@@ -0,0 +1,916 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+FetchContent
+------------------
+
+.. only:: html
+
+  .. contents::
+
+Overview
+^^^^^^^^
+
+This module enables populating content at configure time via any method
+supported by the :module:`ExternalProject` module.  Whereas
+:command:`ExternalProject_Add` downloads at build time, the
+``FetchContent`` module makes content available immediately, allowing the
+configure step to use the content in commands like :command:`add_subdirectory`,
+:command:`include` or :command:`file` operations.
+
+Content population details would normally be defined separately from the
+command that performs the actual population.  Projects should also
+check whether the content has already been populated somewhere else in the
+project hierarchy.  Typical usage would look something like this:
+
+.. code-block:: cmake
+
+  FetchContent_Declare(
+    googletest
+    GIT_REPOSITORY https://github.com/google/googletest.git
+    GIT_TAG        release-1.8.0
+  )
+
+  FetchContent_GetProperties(googletest)
+  if(NOT googletest_POPULATED)
+    FetchContent_Populate(googletest)
+    add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR})
+  endif()
+
+When using the above pattern with a hierarchical project arrangement,
+projects at higher levels in the hierarchy are able to define or override
+the population details of content specified anywhere lower in the project
+hierarchy.  The ability to detect whether content has already been
+populated ensures that even if multiple child projects want certain content
+to be available, the first one to populate it wins.  The other child project
+can simply make use of the already available content instead of repeating
+the population for itself.  See the
+:ref:`Examples <fetch-content-examples>` section which demonstrates
+this scenario.
+
+The ``FetchContent`` module also supports defining and populating
+content in a single call, with no check for whether the content has been
+populated elsewhere in the project already.  This is a more low level
+operation and would not normally be the way the module is used, but it is
+sometimes useful as part of implementing some higher level feature or to
+populate some content in CMake's script mode.
+
+
+Declaring Content Details
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. command:: FetchContent_Declare
+
+  .. code-block:: cmake
+
+    FetchContent_Declare(<name> <contentOptions>...)
+
+  The ``FetchContent_Declare()`` function records the options that describe
+  how to populate the specified content, but if such details have already
+  been recorded earlier in this project (regardless of where in the project
+  hierarchy), this and all later calls for the same content ``<name>`` are
+  ignored.  This "first to record, wins" approach is what allows hierarchical
+  projects to have parent projects override content details of child projects.
+
+  The content ``<name>`` can be any string without spaces, but good practice
+  would be to use only letters, numbers and underscores.  The name will be
+  treated case-insensitively and it should be obvious for the content it
+  represents, often being the name of the child project or the value given
+  to its top level :command:`project` command (if it is a CMake project).
+  For well-known public projects, the name should generally be the official
+  name of the project.  Choosing an unusual name makes it unlikely that other
+  projects needing that same content will use the same name, leading to
+  the content being populated multiple times.
+
+  The ``<contentOptions>`` can be any of the download or update/patch options
+  that the :command:`ExternalProject_Add` command understands.  The configure,
+  build, install and test steps are explicitly disabled and therefore options
+  related to them will be ignored.  In most cases, ``<contentOptions>`` will
+  just be a couple of options defining the download method and method-specific
+  details like a commit tag or archive hash.  For example:
+
+  .. code-block:: cmake
+
+    FetchContent_Declare(
+      googletest
+      GIT_REPOSITORY https://github.com/google/googletest.git
+      GIT_TAG        release-1.8.0
+    )
+
+    FetchContent_Declare(
+      myCompanyIcons
+      URL      https://intranet.mycompany.com/assets/iconset_1.12.tar.gz
+      URL_HASH 5588a7b18261c20068beabfb4f530b87
+    )
+
+    FetchContent_Declare(
+      myCompanyCertificates
+      SVN_REPOSITORY svn+ssh://svn.mycompany.com/srv/svn/trunk/certs
+      SVN_REVISION   -r12345
+    )
+
+Populating The Content
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. command:: FetchContent_Populate
+
+  .. code-block:: cmake
+
+    FetchContent_Populate( <name> )
+
+  In most cases, the only argument given to ``FetchContent_Populate()`` is the
+  ``<name>``.  When used this way, the command assumes the content details have
+  been recorded by an earlier call to :command:`FetchContent_Declare`.  The
+  details are stored in a global property, so they are unaffected by things
+  like variable or directory scope.  Therefore, it doesn't matter where in the
+  project the details were previously declared, as long as they have been
+  declared before the call to ``FetchContent_Populate()``.  Those saved details
+  are then used to construct a call to :command:`ExternalProject_Add` in a
+  private sub-build to perform the content population immediately.  The
+  implementation of ``ExternalProject_Add()`` ensures that if the content has
+  already been populated in a previous CMake run, that content will be reused
+  rather than repopulating them again.  For the common case where population
+  involves downloading content, the cost of the download is only paid once.
+
+  An internal global property records when a particular content population
+  request has been processed.  If ``FetchContent_Populate()`` is called more
+  than once for the same content name within a configure run, the second call
+  will halt with an error.  Projects can and should check whether content
+  population has already been processed with the
+  :command:`FetchContent_GetProperties` command before calling
+  ``FetchContent_Populate()``.
+
+  ``FetchContent_Populate()`` will set three variables in the scope of the
+  caller; ``<lcName>_POPULATED``, ``<lcName>_SOURCE_DIR`` and
+  ``<lcName>_BINARY_DIR``, where ``<lcName>`` is the lowercased ``<name>``.
+  ``<lcName>_POPULATED`` will always be set to ``True`` by the call.
+  ``<lcName>_SOURCE_DIR`` is the location where the
+  content can be found upon return (it will have already been populated), while
+  ``<lcName>_BINARY_DIR`` is a directory intended for use as a corresponding
+  build directory.  The main use case for the two directory variables is to
+  call :command:`add_subdirectory` immediately after population, i.e.:
+
+  .. code-block:: cmake
+
+    FetchContent_Populate(FooBar ...)
+    add_subdirectory(${foobar_SOURCE_DIR} ${foobar_BINARY_DIR})
+
+  The values of the three variables can also be retrieved from anywhere in the
+  project hierarchy using the :command:`FetchContent_GetProperties` command.
+
+  A number of cache variables influence the behavior of all content population
+  performed using details saved from a :command:`FetchContent_Declare` call:
+
+  ``FETCHCONTENT_BASE_DIR``
+    In most cases, the saved details do not specify any options relating to the
+    directories to use for the internal sub-build, final source and build areas.
+    It is generally best to leave these decisions up to the ``FetchContent``
+    module to handle on the project's behalf.  The ``FETCHCONTENT_BASE_DIR``
+    cache variable controls the point under which all content population
+    directories are collected, but in most cases developers would not need to
+    change this.  The default location is ``${CMAKE_BINARY_DIR}/_deps``, but if
+    developers change this value, they should aim to keep the path short and
+    just below the top level of the build tree to avoid running into path
+    length problems on Windows.
+
+  ``FETCHCONTENT_QUIET``
+    The logging output during population can be quite verbose, making the
+    configure stage quite noisy.  This cache option (``ON`` by default) hides
+    all population output unless an error is encountered.  If experiencing
+    problems with hung downloads, temporarily switching this option off may
+    help diagnose which content population is causing the issue.
+
+  ``FETCHCONTENT_FULLY_DISCONNECTED``
+    When this option is enabled, no attempt is made to download or update
+    any content.  It is assumed that all content has already been populated in
+    a previous run or the source directories have been pointed at existing
+    contents the developer has provided manually (using options described
+    further below).  When the developer knows that no changes have been made to
+    any content details, turning this option ``ON`` can significantly speed up
+    the configure stage.  It is ``OFF`` by default.
+
+  ``FETCHCONTENT_UPDATES_DISCONNECTED``
+    This is a less severe download/update control compared to
+    ``FETCHCONTENT_FULLY_DISCONNECTED``.  Instead of bypassing all download and
+    update logic, the ``FETCHCONTENT_UPDATES_DISCONNECTED`` only disables the
+    update stage.  Therefore, if content has not been downloaded previously,
+    it will still be downloaded when this option is enabled.  This can speed up
+    the configure stage, but not as much as
+    ``FETCHCONTENT_FULLY_DISCONNECTED``.  It is ``OFF`` by default.
+
+  In addition to the above cache variables, the following cache variables are
+  also defined for each content name (``<ucName>`` is the uppercased value of
+  ``<name>``):
+
+  ``FETCHCONTENT_SOURCE_DIR_<ucName>``
+    If this is set, no download or update steps are performed for the specified
+    content and the ``<lcName>_SOURCE_DIR`` variable returned to the caller is
+    pointed at this location.  This gives developers a way to have a separate
+    checkout of the content that they can modify freely without interference
+    from the build.  The build simply uses that existing source, but it still
+    defines ``<lcName>_BINARY_DIR`` to point inside its own build area.
+    Developers are strongly encouraged to use this mechanism rather than
+    editing the sources populated in the default location, as changes to
+    sources in the default location can be lost when content population details
+    are changed by the project.
+
+  ``FETCHCONTENT_UPDATES_DISCONNECTED_<ucName>``
+    This is the per-content equivalent of
+    ``FETCHCONTENT_UPDATES_DISCONNECTED``. If the global option or this option
+    is ``ON``, then updates will be disabled for the named content.
+    Disabling updates for individual content can be useful for content whose
+    details rarely change, while still leaving other frequently changing
+    content with updates enabled.
+
+
+  The ``FetchContent_Populate()`` command also supports a syntax allowing the
+  content details to be specified directly rather than using any saved
+  details.  This is more low-level and use of this form is generally to be
+  avoided in favour of using saved content details as outlined above.
+  Nevertheless, in certain situations it can be useful to invoke the content
+  population as an isolated operation (typically as part of implementing some
+  other higher level feature or when using CMake in script mode):
+
+  .. code-block:: cmake
+
+    FetchContent_Populate( <name>
+      [QUIET]
+      [SUBBUILD_DIR <subBuildDir>]
+      [SOURCE_DIR <srcDir>]
+      [BINARY_DIR <binDir>]
+      ...
+    )
+
+  This form has a number of key differences to that where only ``<name>`` is
+  provided:
+
+  - All required population details are assumed to have been provided directly
+    in the call to ``FetchContent_Populate()``. Any saved details for
+    ``<name>`` are ignored.
+  - No check is made for whether content for ``<name>`` has already been
+    populated.
+  - No global property is set to record that the population has occurred.
+  - No global properties record the source or binary directories used for the
+    populated content.
+  - The ``FETCHCONTENT_FULLY_DISCONNECTED`` and
+    ``FETCHCONTENT_UPDATES_DISCONNECTED`` cache variables are ignored.
+
+  The ``<lcName>_SOURCE_DIR`` and ``<lcName>_BINARY_DIR`` variables are still
+  returned to the caller, but since these locations are not stored as global
+  properties when this form is used, they are only available to the calling
+  scope and below rather than the entire project hierarchy.  No
+  ``<lcName>_POPULATED`` variable is set in the caller's scope with this form.
+
+  The supported options for ``FetchContent_Populate()`` are the same as those
+  for :command:`FetchContent_Declare()`.  Those few options shown just
+  above are either specific to ``FetchContent_Populate()`` or their behavior is
+  slightly modified from how :command:`ExternalProject_Add` treats them.
+
+  ``QUIET``
+    The ``QUIET`` option can be given to hide the output associated with
+    populating the specified content.  If the population fails, the output will
+    be shown regardless of whether this option was given or not so that the
+    cause of the failure can be diagnosed.  The global ``FETCHCONTENT_QUIET``
+    cache variable has no effect on ``FetchContent_Populate()`` calls where the
+    content details are provided directly.
+
+  ``SUBBUILD_DIR``
+    The ``SUBBUILD_DIR`` argument can be provided to change the location of the
+    sub-build created to perform the population.  The default value is
+    ``${CMAKE_CURRENT_BINARY_DIR}/<lcName>-subbuild`` and it would be unusual
+    to need to override this default.  If a relative path is specified, it will
+    be interpreted as relative to :variable:`CMAKE_CURRENT_BINARY_DIR`.
+
+  ``SOURCE_DIR``, ``BINARY_DIR``
+    The ``SOURCE_DIR`` and ``BINARY_DIR`` arguments are supported by
+    :command:`ExternalProject_Add`, but different default values are used by
+    ``FetchContent_Populate()``.  ``SOURCE_DIR`` defaults to
+    ``${CMAKE_CURRENT_BINARY_DIR}/<lcName>-src`` and ``BINARY_DIR`` defaults to
+    ``${CMAKE_CURRENT_BINARY_DIR}/<lcName>-build``.  If a relative path is
+    specified, it will be interpreted as relative to
+    :variable:`CMAKE_CURRENT_BINARY_DIR`.
+
+  In addition to the above explicit options, any other unrecognized options are
+  passed through unmodified to :command:`ExternalProject_Add` to perform the
+  download, patch and update steps.  The following options are explicitly
+  prohibited (they are disabled by the ``FetchContent_Populate()`` command):
+
+  - ``CONFIGURE_COMMAND``
+  - ``BUILD_COMMAND``
+  - ``INSTALL_COMMAND``
+  - ``TEST_COMMAND``
+
+  If using ``FetchContent_Populate()`` within CMake's script mode, be aware
+  that the implementation sets up a sub-build which therefore requires a CMake
+  generator and build tool to be available. If these cannot be found by
+  default, then the :variable:`CMAKE_GENERATOR` and/or
+  :variable:`CMAKE_MAKE_PROGRAM` variables will need to be set appropriately
+  on the command line invoking the script.
+
+
+Retrieve Population Properties
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. command:: FetchContent_GetProperties
+
+  When using saved content details, a call to :command:`FetchContent_Populate`
+  records information in global properties which can be queried at any time.
+  This information includes the source and binary directories associated with
+  the content and also whether or not the content population has been processed
+  during the current configure run.
+
+  .. code-block:: cmake
+
+    FetchContent_GetProperties( <name>
+      [SOURCE_DIR <srcDirVar>]
+      [BINARY_DIR <binDirVar>]
+      [POPULATED <doneVar>]
+    )
+
+  The ``SOURCE_DIR``, ``BINARY_DIR`` and ``POPULATED`` options can be used to
+  specify which properties should be retrieved.  Each option accepts a value
+  which is the name of the variable in which to store that property.  Most of
+  the time though, only ``<name>`` is given, in which case the call will then
+  set the same variables as a call to
+  :command:`FetchContent_Populate(name) <FetchContent_Populate>`.  This allows
+  the following canonical pattern to be used, which ensures that the relevant
+  variables will always be defined regardless of whether or not the population
+  has been performed elsewhere in the project already:
+
+  .. code-block:: cmake
+
+    FetchContent_GetProperties(foobar)
+    if(NOT foobar_POPULATED)
+      FetchContent_Populate(foobar)
+
+      # Set any custom variables, etc. here, then
+      # populate the content as part of this build
+
+      add_subdirectory(${foobar_SOURCE_DIR} ${foobar_BINARY_DIR})
+    endif()
+
+  The above pattern allows other parts of the overall project hierarchy to
+  re-use the same content and ensure that it is only populated once.
+
+
+.. _`fetch-content-examples`:
+
+Examples
+^^^^^^^^
+
+Consider a project hierarchy where ``projA`` is the top level project and it
+depends on projects ``projB`` and ``projC``. Both ``projB`` and ``projC``
+can be built standalone and they also both depend on another project
+``projD``.  For simplicity, this example will assume that all four projects
+are available on a company git server.  The ``CMakeLists.txt`` of each project
+might have sections like the following:
+
+*projA*:
+
+.. code-block:: cmake
+
+  include(FetchContent)
+  FetchContent_Declare(
+    projB
+    GIT_REPOSITORY git@mycompany.com/git/projB.git
+    GIT_TAG        4a89dc7e24ff212a7b5167bef7ab079d
+  )
+  FetchContent_Declare(
+    projC
+    GIT_REPOSITORY git@mycompany.com/git/projC.git
+    GIT_TAG        4ad4016bd1d8d5412d135cf8ceea1bb9
+  )
+  FetchContent_Declare(
+    projD
+    GIT_REPOSITORY git@mycompany.com/git/projD.git
+    GIT_TAG        origin/integrationBranch
+  )
+
+  FetchContent_GetProperties(projB)
+  if(NOT projb_POPULATED)
+    FetchContent_Populate(projB)
+    add_subdirectory(${projb_SOURCE_DIR} ${projb_BINARY_DIR})
+  endif()
+
+  FetchContent_GetProperties(projC)
+  if(NOT projc_POPULATED)
+    FetchContent_Populate(projC)
+    add_subdirectory(${projc_SOURCE_DIR} ${projc_BINARY_DIR})
+  endif()
+
+*projB*:
+
+.. code-block:: cmake
+
+  include(FetchContent)
+  FetchContent_Declare(
+    projD
+    GIT_REPOSITORY git@mycompany.com/git/projD.git
+    GIT_TAG        20b415f9034bbd2a2e8216e9a5c9e632
+  )
+
+  FetchContent_GetProperties(projD)
+  if(NOT projd_POPULATED)
+    FetchContent_Populate(projD)
+    add_subdirectory(${projd_SOURCE_DIR} ${projd_BINARY_DIR})
+  endif()
+
+
+*projC*:
+
+.. code-block:: cmake
+
+  include(FetchContent)
+  FetchContent_Declare(
+    projD
+    GIT_REPOSITORY git@mycompany.com/git/projD.git
+    GIT_TAG        7d9a17ad2c962aa13e2fbb8043fb6b8a
+  )
+
+  FetchContent_GetProperties(projD)
+  if(NOT projd_POPULATED)
+    FetchContent_Populate(projD)
+    add_subdirectory(${projd_SOURCE_DIR} ${projd_BINARY_DIR})
+  endif()
+
+A few key points should be noted in the above:
+
+- ``projB`` and ``projC`` define different content details for ``projD``,
+  but ``projA`` also defines a set of content details for ``projD`` and
+  because ``projA`` will define them first, the details from ``projB`` and
+  ``projC`` will not be used.  The override details defined by ``projA``
+  are not required to match either of those from ``projB`` or ``projC``, but
+  it is up to the higher level project to ensure that the details it does
+  define still make sense for the child projects.
+- While ``projA`` defined content details for ``projD``, it did not need
+  to explicitly call ``FetchContent_Populate(projD)`` itself.  Instead, it
+  leaves that to a child project to do (in this case it will be ``projB``
+  since it is added to the build ahead of ``projC``).  If ``projA`` needed to
+  customize how the ``projD`` content was brought into the build as well
+  (e.g. define some CMake variables before calling
+  :command:`add_subdirectory` after populating), it would do the call to
+  ``FetchContent_Populate()``, etc. just as it did for the ``projB`` and
+  ``projC`` content.  For higher level projects, it is usually enough to
+  just define the override content details and leave the actual population
+  to the child projects.  This saves repeating the same thing at each level
+  of the project hierarchy unnecessarily.
+- Even though ``projA`` is the top level project in this example, it still
+  checks whether ``projB`` and ``projC`` have already been populated before
+  going ahead to do those populations.  This makes ``projA`` able to be more
+  easily incorporated as a child of some other higher level project in the
+  future if required.  Always protect a call to
+  :command:`FetchContent_Populate` with a check to
+  :command:`FetchContent_GetProperties`, even in what may be considered a top
+  level project at the time.
+
+
+The following example demonstrates how one might download and unpack a
+firmware tarball using CMake's :manual:`script mode <cmake(1)>`.  The call to
+:command:`FetchContent_Populate` specifies all the content details and the
+unpacked firmware will be placed in a ``firmware`` directory below the
+current working directory.
+
+*getFirmware.cmake*:
+
+.. code-block:: cmake
+
+  # NOTE: Intended to be run in script mode with cmake -P
+  include(FetchContent)
+  FetchContent_Populate(
+    firmware
+    URL        https://mycompany.com/assets/firmware-1.23-arm.tar.gz
+    URL_HASH   MD5=68247684da89b608d466253762b0ff11
+    SOURCE_DIR firmware
+  )
+
+#]=======================================================================]
+
+
+set(__FetchContent_privateDir "${CMAKE_CURRENT_LIST_DIR}/FetchContent")
+
+#=======================================================================
+# Recording and retrieving content details for later population
+#=======================================================================
+
+# Internal use, projects must not call this directly. It is
+# intended for use by FetchContent_Declare() only.
+#
+# Sets a content-specific global property (not meant for use
+# outside of functions defined here in this file) which can later
+# be retrieved using __FetchContent_getSavedDetails() with just the
+# same content name. If there is already a value stored in the
+# property, it is left unchanged and this call has no effect.
+# This allows parent projects to define the content details,
+# overriding anything a child project may try to set (properties
+# are not cached between runs, so the first thing to set it in a
+# build will be in control).
+function(__FetchContent_declareDetails contentName)
+
+  string(TOLOWER ${contentName} contentNameLower)
+  set(propertyName "_FetchContent_${contentNameLower}_savedDetails")
+  get_property(alreadyDefined GLOBAL PROPERTY ${propertyName} DEFINED)
+  if(NOT alreadyDefined)
+    define_property(GLOBAL PROPERTY ${propertyName}
+      BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+      FULL_DOCS  "Details used by FetchContent_Populate() for ${contentName}"
+    )
+    set_property(GLOBAL PROPERTY ${propertyName} ${ARGN})
+  endif()
+
+endfunction()
+
+
+# Internal use, projects must not call this directly. It is
+# intended for use by the FetchContent_Declare() function.
+#
+# Retrieves details saved for the specified content in an
+# earlier call to __FetchContent_declareDetails().
+function(__FetchContent_getSavedDetails contentName outVar)
+
+  string(TOLOWER ${contentName} contentNameLower)
+  set(propertyName "_FetchContent_${contentNameLower}_savedDetails")
+  get_property(alreadyDefined GLOBAL PROPERTY ${propertyName} DEFINED)
+  if(NOT alreadyDefined)
+    message(FATAL_ERROR "No content details recorded for ${contentName}")
+  endif()
+  get_property(propertyValue GLOBAL PROPERTY ${propertyName})
+  set(${outVar} "${propertyValue}" PARENT_SCOPE)
+
+endfunction()
+
+
+# Saves population details of the content, sets defaults for the
+# SOURCE_DIR and BUILD_DIR.
+function(FetchContent_Declare contentName)
+
+  set(options "")
+  set(oneValueArgs SVN_REPOSITORY)
+  set(multiValueArgs "")
+
+  cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+  unset(srcDirSuffix)
+  unset(svnRepoArgs)
+  if(ARG_SVN_REPOSITORY)
+    # Add a hash of the svn repository URL to the source dir. This works
+    # around the problem where if the URL changes, the download would
+    # fail because it tries to checkout/update rather than switch the
+    # old URL to the new one. We limit the hash to the first 7 characters
+    # so that the source path doesn't get overly long (which can be a
+    # problem on windows due to path length limits).
+    string(SHA1 urlSHA ${ARG_SVN_REPOSITORY})
+    string(SUBSTRING ${urlSHA} 0 7 urlSHA)
+    set(srcDirSuffix "-${urlSHA}")
+    set(svnRepoArgs  SVN_REPOSITORY ${ARG_SVN_REPOSITORY})
+  endif()
+
+  string(TOLOWER ${contentName} contentNameLower)
+  __FetchContent_declareDetails(
+    ${contentNameLower}
+    SOURCE_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-src${srcDirSuffix}"
+    BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build"
+    ${svnRepoArgs}
+    # List these last so they can override things we set above
+    ${ARG_UNPARSED_ARGUMENTS}
+  )
+
+endfunction()
+
+
+#=======================================================================
+# Set/get whether the specified content has been populated yet.
+# The setter also records the source and binary dirs used.
+#=======================================================================
+
+# Internal use, projects must not call this directly. It is
+# intended for use by the FetchContent_Populate() function to
+# record when FetchContent_Populate() is called for a particular
+# content name.
+function(__FetchContent_setPopulated contentName sourceDir binaryDir)
+
+  string(TOLOWER ${contentName} contentNameLower)
+  set(prefix "_FetchContent_${contentNameLower}")
+
+  set(propertyName "${prefix}_sourceDir")
+  define_property(GLOBAL PROPERTY ${propertyName}
+    BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+    FULL_DOCS  "Details used by FetchContent_Populate() for ${contentName}"
+  )
+  set_property(GLOBAL PROPERTY ${propertyName} ${sourceDir})
+
+  set(propertyName "${prefix}_binaryDir")
+  define_property(GLOBAL PROPERTY ${propertyName}
+    BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+    FULL_DOCS  "Details used by FetchContent_Populate() for ${contentName}"
+  )
+  set_property(GLOBAL PROPERTY ${propertyName} ${binaryDir})
+
+  set(propertyName "${prefix}_populated")
+  define_property(GLOBAL PROPERTY ${propertyName}
+    BRIEF_DOCS "Internal implementation detail of FetchContent_Populate()"
+    FULL_DOCS  "Details used by FetchContent_Populate() for ${contentName}"
+  )
+  set_property(GLOBAL PROPERTY ${propertyName} True)
+
+endfunction()
+
+
+# Set variables in the calling scope for any of the retrievable
+# properties. If no specific properties are requested, variables
+# will be set for all retrievable properties.
+#
+# This function is intended to also be used by projects as the canonical
+# way to detect whether they should call FetchContent_Populate()
+# and pull the populated source into the build with add_subdirectory(),
+# if they are using the populated content in that way.
+function(FetchContent_GetProperties contentName)
+
+  string(TOLOWER ${contentName} contentNameLower)
+
+  set(options "")
+  set(oneValueArgs SOURCE_DIR BINARY_DIR POPULATED)
+  set(multiValueArgs "")
+
+  cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+  if(NOT ARG_SOURCE_DIR AND
+     NOT ARG_BINARY_DIR AND
+     NOT ARG_POPULATED)
+    # No specific properties requested, provide them all
+    set(ARG_SOURCE_DIR ${contentNameLower}_SOURCE_DIR)
+    set(ARG_BINARY_DIR ${contentNameLower}_BINARY_DIR)
+    set(ARG_POPULATED  ${contentNameLower}_POPULATED)
+  endif()
+
+  set(prefix "_FetchContent_${contentNameLower}")
+
+  if(ARG_SOURCE_DIR)
+    set(propertyName "${prefix}_sourceDir")
+    get_property(value GLOBAL PROPERTY ${propertyName})
+    if(value)
+      set(${ARG_SOURCE_DIR} ${value} PARENT_SCOPE)
+    endif()
+  endif()
+
+  if(ARG_BINARY_DIR)
+    set(propertyName "${prefix}_binaryDir")
+    get_property(value GLOBAL PROPERTY ${propertyName})
+    if(value)
+      set(${ARG_BINARY_DIR} ${value} PARENT_SCOPE)
+    endif()
+  endif()
+
+  if(ARG_POPULATED)
+    set(propertyName "${prefix}_populated")
+    get_property(value GLOBAL PROPERTY ${propertyName} DEFINED)
+    set(${ARG_POPULATED} ${value} PARENT_SCOPE)
+  endif()
+
+endfunction()
+
+
+#=======================================================================
+# Performing the population
+#=======================================================================
+
+# The value of contentName will always have been lowercased by the caller.
+# All other arguments are assumed to be options that are understood by
+# ExternalProject_Add(), except for QUIET and SUBBUILD_DIR.
+function(__FetchContent_directPopulate contentName)
+
+  set(options
+      QUIET
+  )
+  set(oneValueArgs
+      SUBBUILD_DIR
+      SOURCE_DIR
+      BINARY_DIR
+      # Prevent the following from being passed through
+      CONFIGURE_COMMAND
+      BUILD_COMMAND
+      INSTALL_COMMAND
+      TEST_COMMAND
+  )
+  set(multiValueArgs "")
+
+  cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+  if(NOT ARG_SUBBUILD_DIR)
+    message(FATAL_ERROR "Internal error: SUBBUILD_DIR not set")
+  elseif(NOT IS_ABSOLUTE "${ARG_SUBBUILD_DIR}")
+    set(ARG_SUBBUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ARG_SUBBUILD_DIR}")
+  endif()
+
+  if(NOT ARG_SOURCE_DIR)
+    message(FATAL_ERROR "Internal error: SOURCE_DIR not set")
+  elseif(NOT IS_ABSOLUTE "${ARG_SOURCE_DIR}")
+    set(ARG_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ARG_SOURCE_DIR}")
+  endif()
+
+  if(NOT ARG_BINARY_DIR)
+    message(FATAL_ERROR "Internal error: BINARY_DIR not set")
+  elseif(NOT IS_ABSOLUTE "${ARG_BINARY_DIR}")
+    set(ARG_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/${ARG_BINARY_DIR}")
+  endif()
+
+  # Ensure the caller can know where to find the source and build directories
+  # with some convenient variables. Doing this here ensures the caller sees
+  # the correct result in the case where the default values are overridden by
+  # the content details set by the project.
+  set(${contentName}_SOURCE_DIR "${ARG_SOURCE_DIR}" PARENT_SCOPE)
+  set(${contentName}_BINARY_DIR "${ARG_BINARY_DIR}" PARENT_SCOPE)
+
+  # The unparsed arguments may contain spaces, so build up ARG_EXTRA
+  # in such a way that it correctly substitutes into the generated
+  # CMakeLists.txt file with each argument quoted.
+  unset(ARG_EXTRA)
+  foreach(arg IN LISTS ARG_UNPARSED_ARGUMENTS)
+    set(ARG_EXTRA "${ARG_EXTRA} \"${arg}\"")
+  endforeach()
+
+  # Hide output if requested, but save it to a variable in case there's an
+  # error so we can show the output upon failure. When not quiet, don't
+  # capture the output to a variable because the user may want to see the
+  # output as it happens (e.g. progress during long downloads). Combine both
+  # stdout and stderr in the one capture variable so the output stays in order.
+  if (ARG_QUIET)
+    set(outputOptions
+        OUTPUT_VARIABLE capturedOutput
+        ERROR_VARIABLE  capturedOutput
+    )
+  else()
+    set(capturedOutput)
+    set(outputOptions)
+    message(STATUS "Populating ${contentName}")
+  endif()
+
+  if(CMAKE_GENERATOR)
+    set(generatorOpts "-G${CMAKE_GENERATOR}")
+    if(CMAKE_GENERATOR_PLATFORM)
+      list(APPEND generatorOpts "-A${CMAKE_GENERATOR_PLATFORM}")
+    endif()
+    if(CMAKE_GENERATOR_TOOLSET)
+      list(APPEND generatorOpts "-T${CMAKE_GENERATOR_TOOLSET}")
+    endif()
+
+    if(CMAKE_MAKE_PROGRAM)
+      list(APPEND generatorOpts "-DCMAKE_MAKE_PROGRAM:FILEPATH=${CMAKE_MAKE_PROGRAM}")
+    endif()
+
+  else()
+    # Likely we've been invoked via CMake's script mode where no
+    # generator is set (and hence CMAKE_MAKE_PROGRAM could not be
+    # trusted even if provided). We will have to rely on being
+    # able to find the default generator and build tool.
+    unset(generatorOpts)
+  endif()
+
+  # Create and build a separate CMake project to carry out the population.
+  # If we've already previously done these steps, they will not cause
+  # anything to be updated, so extra rebuilds of the project won't occur.
+  # Make sure to pass through CMAKE_MAKE_PROGRAM in case the main project
+  # has this set to something not findable on the PATH.
+  configure_file("${__FetchContent_privateDir}/CMakeLists.cmake.in"
+                 "${ARG_SUBBUILD_DIR}/CMakeLists.txt")
+  execute_process(
+    COMMAND ${CMAKE_COMMAND} ${generatorOpts} .
+    RESULT_VARIABLE result
+    ${outputOptions}
+    WORKING_DIRECTORY "${ARG_SUBBUILD_DIR}"
+  )
+  if(result)
+    if(capturedOutput)
+      message("${capturedOutput}")
+    endif()
+    message(FATAL_ERROR "CMake step for ${contentName} failed: ${result}")
+  endif()
+  execute_process(
+    COMMAND ${CMAKE_COMMAND} --build .
+    RESULT_VARIABLE result
+    ${outputOptions}
+    WORKING_DIRECTORY "${ARG_SUBBUILD_DIR}"
+  )
+  if(result)
+    if(capturedOutput)
+      message("${capturedOutput}")
+    endif()
+    message(FATAL_ERROR "Build step for ${contentName} failed: ${result}")
+  endif()
+
+endfunction()
+
+
+option(FETCHCONTENT_FULLY_DISCONNECTED   "Disables all attempts to download or update content and assumes source dirs already exist")
+option(FETCHCONTENT_UPDATES_DISCONNECTED "Enables UPDATE_DISCONNECTED behavior for all content population")
+option(FETCHCONTENT_QUIET                "Enables QUIET option for all content population" ON)
+set(FETCHCONTENT_BASE_DIR "${CMAKE_BINARY_DIR}/_deps" CACHE PATH "Directory under which to collect all populated content")
+
+# Populate the specified content using details stored from
+# an earlier call to FetchContent_Declare().
+function(FetchContent_Populate contentName)
+
+  if(NOT contentName)
+    message(FATAL_ERROR "Empty contentName not allowed for FetchContent_Populate()")
+  endif()
+
+  string(TOLOWER ${contentName} contentNameLower)
+
+  if(ARGN)
+    # This is the direct population form with details fully specified
+    # as part of the call, so we already have everything we need
+    __FetchContent_directPopulate(
+      ${contentNameLower}
+      SUBBUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/${contentNameLower}-subbuild"
+      SOURCE_DIR   "${CMAKE_CURRENT_BINARY_DIR}/${contentNameLower}-src"
+      BINARY_DIR   "${CMAKE_CURRENT_BINARY_DIR}/${contentNameLower}-build"
+      ${ARGN}  # Could override any of the above ..._DIR variables
+    )
+
+    # Pass source and binary dir variables back to the caller
+    set(${contentNameLower}_SOURCE_DIR "${${contentNameLower}_SOURCE_DIR}" PARENT_SCOPE)
+    set(${contentNameLower}_BINARY_DIR "${${contentNameLower}_BINARY_DIR}" PARENT_SCOPE)
+
+    # Don't set global properties, or record that we did this population, since
+    # this was a direct call outside of the normal declared details form.
+    # We only want to save values in the global properties for content that
+    # honours the hierarchical details mechanism so that projects are not
+    # robbed of the ability to override details set in nested projects.
+    return()
+  endif()
+
+  # No details provided, so assume they were saved from an earlier call
+  # to FetchContent_Declare(). Do a check that we haven't already
+  # populated this content before in case the caller forgot to check.
+  FetchContent_GetProperties(${contentName})
+  if(${contentNameLower}_POPULATED)
+    message(FATAL_ERROR "Content ${contentName} already populated in ${${contentNameLower}_SOURCE_DIR}")
+  endif()
+
+  string(TOUPPER ${contentName} contentNameUpper)
+  set(FETCHCONTENT_SOURCE_DIR_${contentNameUpper}
+      "${FETCHCONTENT_SOURCE_DIR_${contentNameUpper}}"
+      CACHE PATH "When not empty, overrides where to find pre-populated content for ${contentName}")
+
+  if(FETCHCONTENT_SOURCE_DIR_${contentNameUpper})
+    # The source directory has been explicitly provided in the cache,
+    # so no population is required
+    set(${contentNameLower}_SOURCE_DIR "${FETCHCONTENT_SOURCE_DIR_${contentNameUpper}}")
+    set(${contentNameLower}_BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build")
+
+  elseif(FETCHCONTENT_FULLY_DISCONNECTED)
+    # Bypass population and assume source is already there from a previous run
+    set(${contentNameLower}_SOURCE_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-src")
+    set(${contentNameLower}_BINARY_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build")
+
+  else()
+    # Support both a global "disconnect all updates" and a per-content
+    # update test (either one being set disables updates for this content).
+    option(FETCHCONTENT_UPDATES_DISCONNECTED_${contentNameUpper}
+           "Enables UPDATE_DISCONNECTED behavior just for population of ${contentName}")
+    if(FETCHCONTENT_UPDATES_DISCONNECTED OR
+       FETCHCONTENT_UPDATES_DISCONNECTED_${contentNameUpper})
+      set(disconnectUpdates True)
+    else()
+      set(disconnectUpdates False)
+    endif()
+
+    if(FETCHCONTENT_QUIET)
+      set(quietFlag QUIET)
+    else()
+      unset(quietFlag)
+    endif()
+
+    __FetchContent_getSavedDetails(${contentName} contentDetails)
+    if("${contentDetails}" STREQUAL "")
+      message(FATAL_ERROR "No details have been set for content: ${contentName}")
+    endif()
+
+    __FetchContent_directPopulate(
+      ${contentNameLower}
+      ${quietFlag}
+      UPDATE_DISCONNECTED ${disconnectUpdates}
+      SUBBUILD_DIR "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-subbuild"
+      SOURCE_DIR   "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-src"
+      BINARY_DIR   "${FETCHCONTENT_BASE_DIR}/${contentNameLower}-build"
+      # Put the saved details last so they can override any of the
+      # the options we set above (this can include SOURCE_DIR or
+      # BUILD_DIR)
+      ${contentDetails}
+    )
+  endif()
+
+  __FetchContent_setPopulated(
+    ${contentName}
+    ${${contentNameLower}_SOURCE_DIR}
+    ${${contentNameLower}_BINARY_DIR}
+  )
+
+  # Pass variables back to the caller. The variables passed back here
+  # must match what FetchContent_GetProperties() sets when it is called
+  # with just the content name.
+  set(${contentNameLower}_SOURCE_DIR "${${contentNameLower}_SOURCE_DIR}" PARENT_SCOPE)
+  set(${contentNameLower}_BINARY_DIR "${${contentNameLower}_BINARY_DIR}" PARENT_SCOPE)
+  set(${contentNameLower}_POPULATED  True PARENT_SCOPE)
+
+endfunction()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent/CMakeLists.cmake.in b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent/CMakeLists.cmake.in
new file mode 100644
index 0000000..9a7a771
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/FetchContent/CMakeLists.cmake.in
@@ -0,0 +1,21 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+cmake_minimum_required(VERSION ${CMAKE_VERSION})
+
+# We name the project and the target for the ExternalProject_Add() call
+# to something that will highlight to the user what we are working on if
+# something goes wrong and an error message is produced.
+
+project(${contentName}-populate NONE)
+
+include(ExternalProject)
+ExternalProject_Add(${contentName}-populate
+                    ${ARG_EXTRA}
+                    SOURCE_DIR          "${ARG_SOURCE_DIR}"
+                    BINARY_DIR          "${ARG_BINARY_DIR}"
+                    CONFIGURE_COMMAND   ""
+                    BUILD_COMMAND       ""
+                    INSTALL_COMMAND     ""
+                    TEST_COMMAND        ""
+)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/README.md b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/README.md
new file mode 100644
index 0000000..c8d275f
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/Modules/README.md
@@ -0,0 +1,5 @@
+
+## FetchContent
+
+`FetchContent.cmake` and `FetchContent/CMakeLists.cmake.in`
+are copied from `cmake/3.11.0/share/cmake-3.11/Modules`.
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/cmake_extension.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/cmake_extension.py
new file mode 100644
index 0000000..f3ca874
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/cmake_extension.py
@@ -0,0 +1,120 @@
+# Copyright (c)  2021  Xiaomi Corporation (author: Fangjun Kuang)
+
+import glob
+import os
+import platform
+import shutil
+import sys
+from pathlib import Path
+
+import setuptools
+from setuptools.command.build_ext import build_ext
+
+
+def is_for_pypi():
+    ans = os.environ.get("KALDI_NATIVE_FBANK_IS_FOR_PYPI", None)
+    return ans is not None
+
+
+def is_macos():
+    return platform.system() == "Darwin"
+
+
+def is_windows():
+    return platform.system() == "Windows"
+
+
+try:
+    from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
+
+    class bdist_wheel(_bdist_wheel):
+        def finalize_options(self):
+            _bdist_wheel.finalize_options(self)
+            # In this case, the generated wheel has a name in the form
+            # kaldifeat-xxx-pyxx-none-any.whl
+            if is_for_pypi() and not is_macos():
+                self.root_is_pure = True
+            else:
+                # The generated wheel has a name ending with
+                # -linux_x86_64.whl
+                self.root_is_pure = False
+
+
+except ImportError:
+    bdist_wheel = None
+
+
+def cmake_extension(name, *args, **kwargs) -> setuptools.Extension:
+    kwargs["language"] = "c++"
+    sources = []
+    return setuptools.Extension(name, sources, *args, **kwargs)
+
+
+class BuildExtension(build_ext):
+    def build_extension(self, ext: setuptools.extension.Extension):
+        # build/temp.linux-x86_64-3.8
+        os.makedirs(self.build_temp, exist_ok=True)
+
+        # build/lib.linux-x86_64-3.8
+        os.makedirs(self.build_lib, exist_ok=True)
+
+        install_dir = Path(self.build_lib).resolve() / "kaldi_native_fbank"
+
+        kaldi_native_fbank_dir = Path(__file__).parent.parent.resolve()
+
+        cmake_args = os.environ.get("KALDI_NATIVE_FBANK_CMAKE_ARGS", "")
+        make_args = os.environ.get("KALDI_NATIVE_FBANK_MAKE_ARGS", "")
+        system_make_args = os.environ.get("MAKEFLAGS", "")
+
+        if cmake_args == "":
+            cmake_args = "-DCMAKE_BUILD_TYPE=Release"
+
+        extra_cmake_args = f" -DCMAKE_INSTALL_PREFIX={install_dir} "
+        extra_cmake_args += " -DKALDI_NATIVE_FBANK_BUILD_TESTS=OFF "
+
+        if "PYTHON_EXECUTABLE" not in cmake_args:
+            print(f"Setting PYTHON_EXECUTABLE to {sys.executable}")
+            cmake_args += f" -DPYTHON_EXECUTABLE={sys.executable}"
+
+        cmake_args += extra_cmake_args
+
+        if is_windows():
+            build_cmd = f"""
+                cmake {cmake_args} -B {self.build_temp} -S {kaldi_native_fbank_dir}
+                cmake --build {self.build_temp} --target install --config Release -- -m
+            """
+            print(f"build command is:\n{build_cmd}")
+            ret = os.system(
+                f"cmake {cmake_args} -B {self.build_temp} -S {kaldi_native_fbank_dir}"
+            )
+            if ret != 0:
+                raise Exception("Failed to configure kaldi_native_fbank")
+
+            ret = os.system(
+                f"cmake --build {self.build_temp} --target install --config Release -- -m"
+            )
+            if ret != 0:
+                raise Exception("Failed to install kaldi_native_fbank")
+        else:
+            if make_args == "" and system_make_args == "":
+                print("For fast compilation, run:")
+                print(
+                    'export KALDI_NATIVE_FBANK_MAKE_ARGS="-j"; python setup.py install'
+                )
+
+            build_cmd = f"""
+                cd {self.build_temp}
+
+                cmake {cmake_args} {kaldi_native_fbank_dir}
+
+                make {make_args} install
+            """
+            print(f"build command is:\n{build_cmd}")
+
+            ret = os.system(build_cmd)
+            if ret != 0:
+                raise Exception(
+                    "\nBuild kaldi-native-fbank failed. Please check the error message.\n"
+                    "You can ask for help by creating an issue on GitHub.\n"
+                    "\nClick:\n\thttps://github.com/csukuangfj/kaldi-native-fbank/issues/new\n"  # noqa
+                )
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/googletest.cmake b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/googletest.cmake
new file mode 100644
index 0000000..a732dc7
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/googletest.cmake
@@ -0,0 +1,57 @@
+function(download_googltest)
+  if(CMAKE_VERSION VERSION_LESS 3.11)
+    list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
+  endif()
+
+  include(FetchContent)
+
+  set(googletest_URL  "https://github.com/google/googletest/archive/release-1.10.0.tar.gz")
+  set(googletest_HASH "SHA256=9dc9157a9a1551ec7a7e43daea9a694a0bb5fb8bec81235d8a1e6ef64c716dcb")
+
+  set(BUILD_GMOCK ON CACHE BOOL "" FORCE)
+  set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
+  set(gtest_disable_pthreads ON CACHE BOOL "" FORCE)
+  set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+
+  FetchContent_Declare(googletest
+    URL               ${googletest_URL}
+    URL_HASH          ${googletest_HASH}
+  )
+
+  FetchContent_GetProperties(googletest)
+  if(NOT googletest_POPULATED)
+    message(STATUS "Downloading googletest from ${googletest_URL}")
+    FetchContent_Populate(googletest)
+  endif()
+  message(STATUS "googletest is downloaded to ${googletest_SOURCE_DIR}")
+  message(STATUS "googletest's binary dir is ${googletest_BINARY_DIR}")
+
+  if(APPLE)
+    set(CMAKE_MACOSX_RPATH ON) # to solve the following warning on macOS
+  endif()
+  #[==[
+  -- Generating done
+    Policy CMP0042 is not set: MACOSX_RPATH is enabled by default.  Run "cmake
+    --help-policy CMP0042" for policy details.  Use the cmake_policy command to
+    set the policy and suppress this warning.
+
+    MACOSX_RPATH is not specified for the following targets:
+
+      gmock
+      gmock_main
+      gtest
+      gtest_main
+
+  This warning is for project developers.  Use -Wno-dev to suppress it.
+  ]==]
+
+  add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)
+
+  target_include_directories(gtest
+    INTERFACE
+      ${googletest_SOURCE_DIR}/googletest/include
+      ${googletest_SOURCE_DIR}/googlemock/include
+  )
+endfunction()
+
+download_googltest()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/pybind11.cmake b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/pybind11.cmake
new file mode 100644
index 0000000..464f399
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/cmake/pybind11.cmake
@@ -0,0 +1,35 @@
+function(download_pybind11)
+  if(CMAKE_VERSION VERSION_LESS 3.11)
+    list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
+  endif()
+
+  include(FetchContent)
+
+  set(pybind11_URL  "https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.tar.gz")
+  set(pybind11_HASH "SHA256=6bd528c4dbe2276635dc787b6b1f2e5316cf6b49ee3e150264e455a0d68d19c1")
+
+  # If you don't have access to the Internet, please download it to your
+  # local drive and modify the following line according to your needs.
+  if(EXISTS "/star-fj/fangjun/download/github/pybind11-2.9.2.tar.gz")
+    set(pybind11_URL  "file:///star-fj/fangjun/download/github/pybind11-2.9.2.tar.gz")
+  elseif(EXISTS "/Users/fangjun/Downloads/pybind11-2.9.2.tar.gz")
+    set(pybind11_URL  "file:///Users/fangjun/Downloads/pybind11-2.9.2.tar.gz")
+  elseif(EXISTS "/tmp/pybind11-2.9.2.tar.gz")
+    set(pybind11_URL  "file:///tmp/pybind11-2.9.2.tar.gz")
+  endif()
+
+  FetchContent_Declare(pybind11
+    URL               ${pybind11_URL}
+    URL_HASH          ${pybind11_HASH}
+  )
+
+  FetchContent_GetProperties(pybind11)
+  if(NOT pybind11_POPULATED)
+    message(STATUS "Downloading pybind11 from ${pybind11_URL}")
+    FetchContent_Populate(pybind11)
+  endif()
+  message(STATUS "pybind11 is downloaded to ${pybind11_SOURCE_DIR}")
+  add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR} EXCLUDE_FROM_ALL)
+endfunction()
+
+download_pybind11()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/CMakeLists.txt
new file mode 100644
index 0000000..2037626
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_subdirectory(csrc)
+
+if(KALDI_NATIVE_FBANK_BUILD_PYTHON)
+  message(STATUS "Building Python")
+  add_subdirectory(python)
+else()
+  message(STATUS "Disable building Python")
+endif()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt
new file mode 100644
index 0000000..bd61361
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_library(csrc STATIC
+        feature-fbank.cc
+        feature-functions.cc
+        feature-window.cc
+        fftsg.c
+        mel-computations.cc
+        online-feature.cc
+        rfft.cc)
\ No newline at end of file
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt.bak b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt.bak
new file mode 100644
index 0000000..6eb693d
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/CMakeLists.txt.bak
@@ -0,0 +1,93 @@
+
+include_directories(${PROJECT_SOURCE_DIR})
+set(sources
+  feature-fbank.cc
+  feature-functions.cc
+  feature-window.cc
+  fftsg.c
+  mel-computations.cc
+  online-feature.cc
+  rfft.cc
+)
+
+if(KALDI_NATIVE_FBANK_ENABLE_CHECK)
+  list(APPEND sources log.cc)
+endif()
+
+add_library(kaldi-native-fbank-core ${sources})
+if(KALDI_NATIVE_FBANK_ENABLE_CHECK)
+  target_compile_definitions(kaldi-native-fbank-core PUBLIC KNF_ENABLE_CHECK=1)
+
+  if(KNF_HAVE_EXECINFO_H)
+    target_compile_definitions(kaldi-native-fbank-core PRIVATE KNF_HAVE_EXECINFO_H=1)
+  endif()
+
+  if(KNF_HAVE_CXXABI_H)
+    target_compile_definitions(kaldi-native-fbank-core PRIVATE KNF_HAVE_CXXABI_H=1)
+  endif()
+endif()
+
+# We are using std::call_once() in log.h,which requires us to link with -pthread
+if(NOT WIN32 AND KALDI_NATIVE_FBANK_ENABLE_CHECK)
+  target_link_libraries(kaldi-native-fbank-core -pthread)
+endif()
+
+if(KALDI_NATIVE_FBANK_BUILD_TESTS)
+  add_executable(test-online-fbank test-online-fbank.cc)
+  target_link_libraries(test-online-fbank kaldi-native-fbank-core)
+endif()
+
+function(kaldi_native_fbank_add_test source)
+  get_filename_component(name ${source} NAME_WE)
+  add_executable(${name} "${source}")
+  target_link_libraries(${name}
+    PRIVATE
+      kaldi-native-fbank-core
+      gtest
+      gtest_main
+  )
+
+  add_test(NAME "Test.${name}"
+    COMMAND
+    $<TARGET_FILE:${name}>
+  )
+endfunction()
+
+# please sort the source files alphabetically
+set(test_srcs
+  # test-online-feature.cc
+  test-log.cc
+  test-rfft.cc
+)
+
+if(KALDI_NATIVE_FBANK_BUILD_TESTS)
+  foreach(source IN LISTS test_srcs)
+    kaldi_native_fbank_add_test(${source})
+  endforeach()
+endif()
+
+install(TARGETS kaldi-native-fbank-core
+  DESTINATION lib
+)
+
+if(KALDI_NATIVE_FBANK_BUILD_TESTS)
+  install(TARGETS test-online-fbank
+    DESTINATION bin
+  )
+endif()
+
+file(MAKE_DIRECTORY
+  DESTINATION
+    ${PROJECT_BINARY_DIR}/include/kaldi-native-fbank/csrc
+)
+file(GLOB_RECURSE all_headers *.h)
+
+file(COPY
+  ${all_headers}
+  DESTINATION
+    ${PROJECT_BINARY_DIR}/include/kaldi-native-fbank/csrc
+)
+
+install(FILES ${all_headers}
+  DESTINATION include/kaldi-native-fbank/csrc
+)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.cc
new file mode 100644
index 0000000..068f495
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.cc
@@ -0,0 +1,120 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is copied/modified from kaldi/src/feat/feature-fbank.cc
+//
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-functions.h"
+
+namespace knf {
+
+static void Sqrt(float *in_out, int32_t n) {
+  for (int32_t i = 0; i != n; ++i) {
+    in_out[i] = std::sqrt(in_out[i]);
+  }
+}
+
+std::ostream &operator<<(std::ostream &os, const FbankOptions &opts) {
+  os << opts.ToString();
+  return os;
+}
+
+FbankComputer::FbankComputer(const FbankOptions &opts)
+    : opts_(opts), rfft_(opts.frame_opts.PaddedWindowSize()) {
+  if (opts.energy_floor > 0.0f) {
+    log_energy_floor_ = logf(opts.energy_floor);
+  }
+
+  // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
+  // [note: this call caches it.]
+  GetMelBanks(1.0f);
+}
+
+FbankComputer::~FbankComputer() {
+  for (auto iter = mel_banks_.begin(); iter != mel_banks_.end(); ++iter)
+    delete iter->second;
+}
+
+const MelBanks *FbankComputer::GetMelBanks(float vtln_warp) {
+  MelBanks *this_mel_banks = nullptr;
+
+  // std::map<float, MelBanks *>::iterator iter = mel_banks_.find(vtln_warp);
+  auto iter = mel_banks_.find(vtln_warp);
+  if (iter == mel_banks_.end()) {
+    this_mel_banks = new MelBanks(opts_.mel_opts, opts_.frame_opts, vtln_warp);
+    mel_banks_[vtln_warp] = this_mel_banks;
+  } else {
+    this_mel_banks = iter->second;
+  }
+  return this_mel_banks;
+}
+
+void FbankComputer::Compute(float signal_raw_log_energy, float vtln_warp,
+                            std::vector<float> *signal_frame, float *feature) {
+  const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));
+
+  KNF_CHECK_EQ(signal_frame->size(), opts_.frame_opts.PaddedWindowSize());
+
+  // Compute energy after window function (not the raw one).
+  if (opts_.use_energy && !opts_.raw_energy) {
+    signal_raw_log_energy = std::log(
+        std::max<float>(InnerProduct(signal_frame->data(), signal_frame->data(),
+                                     signal_frame->size()),
+                        std::numeric_limits<float>::epsilon()));
+  }
+  rfft_.Compute(signal_frame->data());  // signal_frame is modified in-place
+  ComputePowerSpectrum(signal_frame);
+
+  // Use magnitude instead of power if requested.
+  if (!opts_.use_power) {
+    Sqrt(signal_frame->data(), signal_frame->size() / 2 + 1);
+  }
+
+  int32_t mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);
+
+  // Its length is opts_.mel_opts.num_bins
+  float *mel_energies = feature + mel_offset;
+
+  // Sum with mel filter banks over the power spectrum
+  mel_banks.Compute(signal_frame->data(), mel_energies);
+
+  if (opts_.use_log_fbank) {
+    // Avoid log of zero (which should be prevented anyway by dithering).
+    for (int32_t i = 0; i != opts_.mel_opts.num_bins; ++i) {
+      auto t = std::max(mel_energies[i], std::numeric_limits<float>::epsilon());
+      mel_energies[i] = std::log(t);
+    }
+  }
+
+  // Copy energy as first value (or the last, if htk_compat == true).
+  if (opts_.use_energy) {
+    if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) {
+      signal_raw_log_energy = log_energy_floor_;
+    }
+    int32_t energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0;
+    feature[energy_index] = signal_raw_log_energy;
+  }
+}
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.h
new file mode 100644
index 0000000..0786aad
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-fbank.h
@@ -0,0 +1,134 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is copied/modified from kaldi/src/feat/feature-fbank.h
+
+#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
+#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+#include "kaldi-native-fbank/csrc/mel-computations.h"
+#include "kaldi-native-fbank/csrc/rfft.h"
+
+namespace knf {
+
+struct FbankOptions {
+  FrameExtractionOptions frame_opts;
+  MelBanksOptions mel_opts;
+  // append an extra dimension with energy to the filter banks
+  bool use_energy = false;
+  float energy_floor = 0.0f;  // active iff use_energy==true
+
+  // If true, compute log_energy before preemphasis and windowing
+  // If false, compute log_energy after preemphasis ans windowing
+  bool raw_energy = true;  // active iff use_energy==true
+
+  // If true, put energy last (if using energy)
+  // If false, put energy first
+  bool htk_compat = false;  // active iff use_energy==true
+
+  // if true (default), produce log-filterbank, else linear
+  bool use_log_fbank = true;
+
+  // if true (default), use power in filterbank
+  // analysis, else magnitude.
+  bool use_power = true;
+
+  FbankOptions() { mel_opts.num_bins = 23; }
+
+  std::string ToString() const {
+    std::ostringstream os;
+    os << "frame_opts: \n";
+    os << frame_opts << "\n";
+    os << "\n";
+
+    os << "mel_opts: \n";
+    os << mel_opts << "\n";
+
+    os << "use_energy: " << use_energy << "\n";
+    os << "energy_floor: " << energy_floor << "\n";
+    os << "raw_energy: " << raw_energy << "\n";
+    os << "htk_compat: " << htk_compat << "\n";
+    os << "use_log_fbank: " << use_log_fbank << "\n";
+    os << "use_power: " << use_power << "\n";
+    return os.str();
+  }
+};
+
+std::ostream &operator<<(std::ostream &os, const FbankOptions &opts);
+
+class FbankComputer {
+ public:
+  using Options = FbankOptions;
+
+  explicit FbankComputer(const FbankOptions &opts);
+  ~FbankComputer();
+
+  int32_t Dim() const {
+    return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
+  }
+
+  // if true, compute log_energy_pre_window but after dithering and dc removal
+  bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }
+
+  const FrameExtractionOptions &GetFrameOptions() const {
+    return opts_.frame_opts;
+  }
+
+  const FbankOptions &GetOptions() const { return opts_; }
+
+  /**
+     Function that computes one frame of features from
+     one frame of signal.
+
+     @param [in] signal_raw_log_energy The log-energy of the frame of the signal
+         prior to windowing and pre-emphasis, or
+         log(numeric_limits<float>::min()), whichever is greater.  Must be
+         ignored by this function if this class returns false from
+         this->NeedsRawLogEnergy().
+     @param [in] vtln_warp  The VTLN warping factor that the user wants
+         to be applied when computing features for this utterance.  Will
+         normally be 1.0, meaning no warping is to be done.  The value will
+         be ignored for feature types that don't support VLTN, such as
+         spectrogram features.
+     @param [in] signal_frame  One frame of the signal,
+       as extracted using the function ExtractWindow() using the options
+       returned by this->GetFrameOptions().  The function will use the
+       vector as a workspace, which is why it's a non-const pointer.
+     @param [out] feature  Pointer to a vector of size this->Dim(), to which
+         the computed feature will be written. It should be pre-allocated.
+  */
+  void Compute(float signal_raw_log_energy, float vtln_warp,
+               std::vector<float> *signal_frame, float *feature);
+
+ private:
+  const MelBanks *GetMelBanks(float vtln_warp);
+
+  FbankOptions opts_;
+  float log_energy_floor_;
+  std::map<float, MelBanks *> mel_banks_;  // float is VTLN coefficient.
+  Rfft rfft_;
+};
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.cc
new file mode 100644
index 0000000..00ae4c7
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.cc
@@ -0,0 +1,49 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is copied/modified from kaldi/src/feat/feature-functions.cc
+
+#include "kaldi-native-fbank/csrc/feature-functions.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace knf {
+
+void ComputePowerSpectrum(std::vector<float> *complex_fft) {
+  int32_t dim = complex_fft->size();
+
+  // now we have in complex_fft, first half of complex spectrum
+  // it's stored as [real0, realN/2, real1, im1, real2, im2, ...]
+
+  float *p = complex_fft->data();
+  int32_t half_dim = dim / 2;
+  float first_energy = p[0] * p[0];
+  float last_energy = p[1] * p[1];  // handle this special case
+
+  for (int32_t i = 1; i < half_dim; ++i) {
+    float real = p[i * 2];
+    float im = p[i * 2 + 1];
+    p[i] = real * real + im * im;
+  }
+  p[0] = first_energy;
+  p[half_dim] = last_energy;  // Will actually never be used, and anyway
+  // if the signal has been bandlimited sensibly this should be zero.
+}
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.h
new file mode 100644
index 0000000..b221622
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-functions.h
@@ -0,0 +1,38 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is copied/modified from kaldi/src/feat/feature-functions.h
+#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H_
+#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H_
+
+#include <vector>
+namespace knf {
+
+// ComputePowerSpectrum converts a complex FFT (as produced by the FFT
+// functions in csrc/rfft.h), and converts it into
+// a power spectrum.  If the complex FFT is a vector of size n (representing
+// half of the complex FFT of a real signal of size n, as described there),
+// this function computes in the first (n/2) + 1 elements of it, the
+// energies of the fft bins from zero to the Nyquist frequency.  Contents of the
+// remaining (n/2) - 1 elements are undefined at output.
+
+void ComputePowerSpectrum(std::vector<float> *complex_fft);
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.cc
new file mode 100644
index 0000000..dc189a6
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.cc
@@ -0,0 +1,247 @@
+// kaldi-native-fbank/csrc/feature-window.cc
+//
+// Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+
+// This file is copied/modified from kaldi/src/feat/feature-window.cc
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <vector>
+
+#ifndef M_2PI
+#define M_2PI 6.283185307179586476925286766559005
+#endif
+
+namespace knf {
+
+std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts) {
+  os << opts.ToString();
+  return os;
+}
+
+FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts)
+    : window_(opts.WindowSize()) {
+  int32_t frame_length = opts.WindowSize();
+  KNF_CHECK_GT(frame_length, 0);
+
+  float *window_data = window_.data();
+
+  double a = M_2PI / (frame_length - 1);
+  for (int32_t i = 0; i < frame_length; i++) {
+    double i_fl = static_cast<double>(i);
+    if (opts.window_type == "hanning") {
+      window_data[i] = 0.5 - 0.5 * cos(a * i_fl);
+    } else if (opts.window_type == "sine") {
+      // when you are checking ws wikipedia, please
+      // note that 0.5 * a = M_PI/(frame_length-1)
+      window_data[i] = sin(0.5 * a * i_fl);
+    } else if (opts.window_type == "hamming") {
+      window_data[i] = 0.54 - 0.46 * cos(a * i_fl);
+    } else if (opts.window_type ==
+               "povey") {  // like hamming but goes to zero at edges.
+      window_data[i] = pow(0.5 - 0.5 * cos(a * i_fl), 0.85);
+    } else if (opts.window_type == "rectangular") {
+      window_data[i] = 1.0;
+    } else if (opts.window_type == "blackman") {
+      window_data[i] = opts.blackman_coeff - 0.5 * cos(a * i_fl) +
+                       (0.5 - opts.blackman_coeff) * cos(2 * a * i_fl);
+    } else {
+      KNF_LOG(FATAL) << "Invalid window type " << opts.window_type;
+    }
+  }
+}
+
+void FeatureWindowFunction::Apply(float *wave) const {
+  int32_t window_size = window_.size();
+  const float *p = window_.data();
+  for (int32_t k = 0; k != window_size; ++k) {
+    wave[k] *= p[k];
+  }
+}
+
+int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts) {
+  int64_t frame_shift = opts.WindowShift();
+  if (opts.snip_edges) {
+    return frame * frame_shift;
+  } else {
+    int64_t midpoint_of_frame = frame_shift * frame + frame_shift / 2,
+            beginning_of_frame = midpoint_of_frame - opts.WindowSize() / 2;
+    return beginning_of_frame;
+  }
+}
+
+int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
+                  bool flush /*= true*/) {
+  int64_t frame_shift = opts.WindowShift();
+  int64_t frame_length = opts.WindowSize();
+  if (opts.snip_edges) {
+    // with --snip-edges=true (the default), we use a HTK-like approach to
+    // determining the number of frames-- all frames have to fit completely into
+    // the waveform, and the first frame begins at sample zero.
+    if (num_samples < frame_length)
+      return 0;
+    else
+      return (1 + ((num_samples - frame_length) / frame_shift));
+    // You can understand the expression above as follows: 'num_samples -
+    // frame_length' is how much room we have to shift the frame within the
+    // waveform; 'frame_shift' is how much we shift it each time; and the ratio
+    // is how many times we can shift it (integer arithmetic rounds down).
+  } else {
+    // if --snip-edges=false, the number of frames is determined by rounding the
+    // (file-length / frame-shift) to the nearest integer.  The point of this
+    // formula is to make the number of frames an obvious and predictable
+    // function of the frame shift and signal length, which makes many
+    // segmentation-related questions simpler.
+    //
+    // Because integer division in C++ rounds toward zero, we add (half the
+    // frame-shift minus epsilon) before dividing, to have the effect of
+    // rounding towards the closest integer.
+    int32_t num_frames = (num_samples + (frame_shift / 2)) / frame_shift;
+
+    if (flush) return num_frames;
+
+    // note: 'end' always means the last plus one, i.e. one past the last.
+    int64_t end_sample_of_last_frame =
+        FirstSampleOfFrame(num_frames - 1, opts) + frame_length;
+
+    // the following code is optimized more for clarity than efficiency.
+    // If flush == false, we can't output frames that extend past the end
+    // of the signal.
+    while (num_frames > 0 && end_sample_of_last_frame > num_samples) {
+      num_frames--;
+      end_sample_of_last_frame -= frame_shift;
+    }
+    return num_frames;
+  }
+}
+
+void ExtractWindow(int64_t sample_offset, const std::vector<float> &wave,
+                   int32_t f, const FrameExtractionOptions &opts,
+                   const FeatureWindowFunction &window_function,
+                   std::vector<float> *window,
+                   float *log_energy_pre_window /*= nullptr*/) {
+  KNF_CHECK(sample_offset >= 0 && wave.size() != 0);
+
+  int32_t frame_length = opts.WindowSize();
+  int32_t frame_length_padded = opts.PaddedWindowSize();
+
+  int64_t num_samples = sample_offset + wave.size();
+  int64_t start_sample = FirstSampleOfFrame(f, opts);
+  int64_t end_sample = start_sample + frame_length;
+
+  if (opts.snip_edges) {
+    KNF_CHECK(start_sample >= sample_offset && end_sample <= num_samples);
+  } else {
+    KNF_CHECK(sample_offset == 0 || start_sample >= sample_offset);
+  }
+
+  if (window->size() != frame_length_padded) {
+    window->resize(frame_length_padded);
+  }
+
+  // wave_start and wave_end are start and end indexes into 'wave', for the
+  // piece of wave that we're trying to extract.
+  int32_t wave_start = int32_t(start_sample - sample_offset);
+  int32_t wave_end = wave_start + frame_length;
+
+  if (wave_start >= 0 && wave_end <= wave.size()) {
+    // the normal case-- no edge effects to consider.
+    std::copy(wave.begin() + wave_start,
+              wave.begin() + wave_start + frame_length, window->data());
+  } else {
+    // Deal with any end effects by reflection, if needed.  This code will only
+    // be reached for about two frames per utterance, so we don't concern
+    // ourselves excessively with efficiency.
+    int32_t wave_dim = wave.size();
+    for (int32_t s = 0; s < frame_length; ++s) {
+      int32_t s_in_wave = s + wave_start;
+      while (s_in_wave < 0 || s_in_wave >= wave_dim) {
+        // reflect around the beginning or end of the wave.
+        // e.g. -1 -> 0, -2 -> 1.
+        // dim -> dim - 1, dim + 1 -> dim - 2.
+        // the code supports repeated reflections, although this
+        // would only be needed in pathological cases.
+        if (s_in_wave < 0)
+          s_in_wave = -s_in_wave - 1;
+        else
+          s_in_wave = 2 * wave_dim - 1 - s_in_wave;
+      }
+      (*window)[s] = wave[s_in_wave];
+    }
+  }
+
+  ProcessWindow(opts, window_function, window->data(), log_energy_pre_window);
+}
+
+static void RemoveDcOffset(float *d, int32_t n) {
+  float sum = 0;
+  for (int32_t i = 0; i != n; ++i) {
+    sum += d[i];
+  }
+
+  float mean = sum / n;
+
+  for (int32_t i = 0; i != n; ++i) {
+    d[i] -= mean;
+  }
+}
+
+float InnerProduct(const float *a, const float *b, int32_t n) {
+  float sum = 0;
+  for (int32_t i = 0; i != n; ++i) {
+    sum += a[i] * b[i];
+  }
+  return sum;
+}
+
+static void Preemphasize(float *d, int32_t n, float preemph_coeff) {
+  if (preemph_coeff == 0.0) {
+    return;
+  }
+
+  KNF_CHECK(preemph_coeff >= 0.0 && preemph_coeff <= 1.0);
+
+  for (int32_t i = n - 1; i > 0; --i) {
+    d[i] -= preemph_coeff * d[i - 1];
+  }
+  d[0] -= preemph_coeff * d[0];
+}
+
+void ProcessWindow(const FrameExtractionOptions &opts,
+                   const FeatureWindowFunction &window_function, float *window,
+                   float *log_energy_pre_window /*= nullptr*/) {
+  int32_t frame_length = opts.WindowSize();
+
+//  // TODO(fangjun): Remove dither
+//  KNF_CHECK_EQ(opts.dither, 0);
+
+  // Add dither function
+  // https://github.com/kaldi-asr/kaldi/blob/master/src/feat/feature-window.cc
+  if (opts.dither!=0) {
+//    kaldi::RandomState rstate;
+//    rstate.seed=0;
+//    for (int32 i = 0; i < frame_length; i++)
+//      window[i] += RandGauss(&rstate) * opts.dither;
+  }
+
+  if (opts.remove_dc_offset) {
+    RemoveDcOffset(window, frame_length);
+  }
+
+  if (log_energy_pre_window != NULL) {
+    float energy = std::max<float>(InnerProduct(window, window, frame_length),
+                                   std::numeric_limits<float>::epsilon());
+    *log_energy_pre_window = std::log(energy);
+  }
+
+  if (opts.preemph_coeff != 0.0) {
+    Preemphasize(window, frame_length, opts.preemph_coeff);
+  }
+
+  window_function.Apply(window);
+}
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.h
new file mode 100644
index 0000000..32e8e2a
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/feature-window.h
@@ -0,0 +1,178 @@
+// kaldi-native-fbank/csrc/feature-window.h
+//
+// Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+
+// This file is copied/modified from kaldi/src/feat/feature-window.h
+
+#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_WINDOW_H_
+#define KALDI_NATIVE_FBANK_CSRC_FEATURE_WINDOW_H_
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/log.h"
+
+namespace knf {
+
+inline int32_t RoundUpToNearestPowerOfTwo(int32_t n) {
+  // copied from kaldi/src/base/kaldi-math.cc
+  KNF_CHECK_GT(n, 0);
+  n--;
+  n |= n >> 1;
+  n |= n >> 2;
+  n |= n >> 4;
+  n |= n >> 8;
+  n |= n >> 16;
+  return n + 1;
+}
+
+struct FrameExtractionOptions {
+  float samp_freq = 16000;
+  float frame_shift_ms = 10.0f;   // in milliseconds.
+  float frame_length_ms = 25.0f;  // in milliseconds.
+  float dither = 1.0f;            // Amount of dithering, 0.0 means no dither.
+  float preemph_coeff = 0.97f;    // Preemphasis coefficient.
+  bool remove_dc_offset = true;   // Subtract mean of wave before FFT.
+  std::string window_type = "povey";  // e.g. Hamming window
+  // May be "hamming", "rectangular", "povey", "hanning", "sine", "blackman"
+  // "povey" is a window I made to be similar to Hamming but to go to zero at
+  // the edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85) I just don't think the
+  // Hamming window makes sense as a windowing function.
+  bool round_to_power_of_two = true;
+  float blackman_coeff = 0.42f;
+  bool snip_edges = true;
+  // bool allow_downsample = false;
+  // bool allow_upsample = false;
+
+  // Used for streaming feature extraction. It indicates the number
+  // of feature frames to keep in the recycling vector. -1 means to
+  // keep all feature frames.
+  int32_t max_feature_vectors = -1;
+
+  int32_t WindowShift() const {
+    return static_cast<int32_t>(samp_freq * 0.001f * frame_shift_ms);
+  }
+  int32_t WindowSize() const {
+    return static_cast<int32_t>(samp_freq * 0.001f * frame_length_ms);
+  }
+  int32_t PaddedWindowSize() const {
+    return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize())
+                                  : WindowSize());
+  }
+  std::string ToString() const {
+    std::ostringstream os;
+#define KNF_PRINT(x) os << #x << ": " << x << "\n"
+    KNF_PRINT(samp_freq);
+    KNF_PRINT(frame_shift_ms);
+    KNF_PRINT(frame_length_ms);
+    KNF_PRINT(dither);
+    KNF_PRINT(preemph_coeff);
+    KNF_PRINT(remove_dc_offset);
+    KNF_PRINT(window_type);
+    KNF_PRINT(round_to_power_of_two);
+    KNF_PRINT(blackman_coeff);
+    KNF_PRINT(snip_edges);
+    // KNF_PRINT(allow_downsample);
+    // KNF_PRINT(allow_upsample);
+    KNF_PRINT(max_feature_vectors);
+#undef KNF_PRINT
+    return os.str();
+  }
+};
+
+std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts);
+
+class FeatureWindowFunction {
+ public:
+  FeatureWindowFunction() = default;
+  explicit FeatureWindowFunction(const FrameExtractionOptions &opts);
+  /**
+   * @param wave Pointer to a 1-D array of shape [window_size].
+   *             It is modified in-place: wave[i] = wave[i] * window_[i].
+   * @param
+   */
+  void Apply(float *wave) const;
+
+ private:
+  std::vector<float> window_;  // of size opts.WindowSize()
+};
+
+int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts);
+
+/**
+   This function returns the number of frames that we can extract from a wave
+   file with the given number of samples in it (assumed to have the same
+   sampling rate as specified in 'opts').
+
+      @param [in] num_samples  The number of samples in the wave file.
+      @param [in] opts     The frame-extraction options class
+
+      @param [in] flush   True if we are asserting that this number of samples
+   is 'all there is', false if we expecting more data to possibly come in.  This
+   only makes a difference to the answer
+   if opts.snips_edges== false.  For offline feature extraction you always want
+   flush == true.  In an online-decoding context, once you know (or decide) that
+   no more data is coming in, you'd call it with flush == true at the end to
+   flush out any remaining data.
+*/
+int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
+                  bool flush = true);
+
+/*
+  ExtractWindow() extracts a windowed frame of waveform (possibly with a
+  power-of-two, padded size, depending on the config), including all the
+  processing done by ProcessWindow().
+
+  @param [in] sample_offset  If 'wave' is not the entire waveform, but
+                   part of it to the left has been discarded, then the
+                   number of samples prior to 'wave' that we have
+                   already discarded.  Set this to zero if you are
+                   processing the entire waveform in one piece, or
+                   if you get 'no matching function' compilation
+                   errors when updating the code.
+  @param [in] wave  The waveform
+  @param [in] f     The frame index to be extracted, with
+                    0 <= f < NumFrames(sample_offset + wave.Dim(), opts, true)
+  @param [in] opts  The options class to be used
+  @param [in] window_function  The windowing function, as derived from the
+                    options class.
+  @param [out] window  The windowed, possibly-padded waveform to be
+                     extracted.  Will be resized as needed.
+  @param [out] log_energy_pre_window  If non-NULL, the log-energy of
+                   the signal prior to pre-emphasis and multiplying by
+                   the windowing function will be written to here.
+*/
+void ExtractWindow(int64_t sample_offset, const std::vector<float> &wave,
+                   int32_t f, const FrameExtractionOptions &opts,
+                   const FeatureWindowFunction &window_function,
+                   std::vector<float> *window,
+                   float *log_energy_pre_window = nullptr);
+
+/**
+  This function does all the windowing steps after actually
+  extracting the windowed signal: depending on the
+  configuration, it does dithering, dc offset removal,
+  preemphasis, and multiplication by the windowing function.
+   @param [in] opts  The options class to be used
+   @param [in] window_function  The windowing function-- should have
+                    been initialized using 'opts'.
+   @param [in,out] window  A vector of size opts.WindowSize().  Note:
+      it will typically be a sub-vector of a larger vector of size
+      opts.PaddedWindowSize(), with the remaining samples zero,
+      as the FFT code is more efficient if it operates on data with
+      power-of-two size.
+   @param [out]   log_energy_pre_window If non-NULL, then after dithering and
+      DC offset removal, this function will write to this pointer the log of
+      the total energy (i.e. sum-squared) of the frame.
+ */
+void ProcessWindow(const FrameExtractionOptions &opts,
+                   const FeatureWindowFunction &window_function, float *window,
+                   float *log_energy_pre_window = nullptr);
+
+// Compute the inner product of two vectors
+float InnerProduct(const float *a, const float *b, int32_t n);
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_CSRC_FEATURE_WINDOW_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/fftsg.c b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/fftsg.c
new file mode 100644
index 0000000..40242c0
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/fftsg.c
@@ -0,0 +1,2968 @@
+/* This file is copied from
+ * https://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
+ */
+/*
+Fast Fourier/Cosine/Sine Transform
+    dimension   :one
+    data length :power of 2
+    decimation  :frequency
+    radix       :split-radix
+    data        :inplace
+    table       :use
+functions
+    cdft: Complex Discrete Fourier Transform
+    rdft: Real Discrete Fourier Transform
+    ddct: Discrete Cosine Transform
+    ddst: Discrete Sine Transform
+    dfct: Cosine Transform of RDFT (Real Symmetric DFT)
+    dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
+function prototypes
+    void cdft(int, int, double *, int *, double *);
+    void rdft(int, int, double *, int *, double *);
+    void ddct(int, int, double *, int *, double *);
+    void ddst(int, int, double *, int *, double *);
+    void dfct(int, double *, double *, int *, double *);
+    void dfst(int, double *, double *, int *, double *);
+macro definitions
+    USE_CDFT_PTHREADS : default=not defined
+        CDFT_THREADS_BEGIN_N  : must be >= 512, default=8192
+        CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
+    USE_CDFT_WINTHREADS : default=not defined
+        CDFT_THREADS_BEGIN_N  : must be >= 512, default=32768
+        CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288
+
+
+-------- Complex DFT (Discrete Fourier Transform) --------
+    [definition]
+        <case1>
+            X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
+        <case2>
+            X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
+        (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            cdft(2*n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            cdft(2*n, -1, a, ip, w);
+    [parameters]
+        2*n            :data length (int)
+                        n >= 1, n = power of 2
+        a[0...2*n-1]   :input/output data (double *)
+                        input data
+                            a[2*j] = Re(x[j]),
+                            a[2*j+1] = Im(x[j]), 0<=j<n
+                        output data
+                            a[2*k] = Re(X[k]),
+                            a[2*k+1] = Im(X[k]), 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            cdft(2*n, -1, a, ip, w);
+        is
+            cdft(2*n, 1, a, ip, w);
+            for (j = 0; j <= 2 * n - 1; j++) {
+                a[j] *= 1.0 / n;
+            }
+        .
+
+
+-------- Real DFT / Inverse of Real DFT --------
+    [definition]
+        <case1> RDFT
+            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
+            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
+        <case2> IRDFT (excluding scale)
+            a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
+                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
+                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            rdft(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            rdft(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        <case1>
+                            output data
+                                a[2*k] = R[k], 0<=k<n/2
+                                a[2*k+1] = I[k], 0<k<n/2
+                                a[1] = R[n/2]
+                        <case2>
+                            input data
+                                a[2*j] = R[j], 0<=j<n/2
+                                a[2*j+1] = I[j], 0<j<n/2
+                                a[1] = R[n/2]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n/2-1]   :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            rdft(n, 1, a, ip, w);
+        is
+            rdft(n, -1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
+    [definition]
+        <case1> IDCT (excluding scale)
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DCT
+            C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddct(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddct(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        output data
+                            a[k] = C[k], 0<=k<n
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            ddct(n, -1, a, ip, w);
+        is
+            a[0] *= 0.5;
+            ddct(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- DST (Discrete Sine Transform) / Inverse of DST --------
+    [definition]
+        <case1> IDST (excluding scale)
+            S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
+        <case2> DST
+            S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
+    [usage]
+        <case1>
+            ip[0] = 0; // first time only
+            ddst(n, 1, a, ip, w);
+        <case2>
+            ip[0] = 0; // first time only
+            ddst(n, -1, a, ip, w);
+    [parameters]
+        n              :data length (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        <case1>
+                            input data
+                                a[j] = A[j], 0<j<n
+                                a[0] = A[n]
+                            output data
+                                a[k] = S[k], 0<=k<n
+                        <case2>
+                            output data
+                                a[k] = S[k], 0<k<n
+                                a[0] = S[n]
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/2)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/4-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            ddst(n, -1, a, ip, w);
+        is
+            a[0] *= 0.5;
+            ddst(n, 1, a, ip, w);
+            for (j = 0; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
+    [definition]
+        C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
+    [usage]
+        ip[0] = 0; // first time only
+        dfct(n, a, t, ip, w);
+    [parameters]
+        n              :data length - 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n]       :input/output data (double *)
+                        output data
+                            a[k] = C[k], 0<=k<=n
+        t[0...n/2]     :work area (double *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+        is
+            a[0] *= 0.5;
+            a[n] *= 0.5;
+            dfct(n, a, t, ip, w);
+            for (j = 0; j <= n; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
+    [definition]
+        S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
+    [usage]
+        ip[0] = 0; // first time only
+        dfst(n, a, t, ip, w);
+    [parameters]
+        n              :data length + 1 (int)
+                        n >= 2, n = power of 2
+        a[0...n-1]     :input/output data (double *)
+                        output data
+                            a[k] = S[k], 0<k<n
+                        (a[0] is used for work area)
+        t[0...n/2-1]   :work area (double *)
+        ip[0...*]      :work area for bit reversal (int *)
+                        length of ip >= 2+sqrt(n/4)
+                        strictly,
+                        length of ip >=
+                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
+                        ip[0],ip[1] are pointers of the cos/sin table.
+        w[0...n*5/8-1] :cos/sin table (double *)
+                        w[],ip[] are initialized if ip[0] == 0.
+    [remark]
+        Inverse of
+            dfst(n, a, t, ip, w);
+        is
+            dfst(n, a, t, ip, w);
+            for (j = 1; j <= n - 1; j++) {
+                a[j] *= 2.0 / n;
+            }
+        .
+
+
+Appendix :
+    The cos/sin table is recalculated when the larger table required.
+    w[] and ip[] are compatible with all routines.
+*/
+
+
+
+void rdft(int n, int isgn, double *a, int *ip, double *w)
+{
+    void makewt(int nw, int *ip, double *w);
+    void makect(int nc, int *ip, double *c);
+    void cftfsub(int n, double *a, int *ip, int nw, double *w);
+    void cftbsub(int n, double *a, int *ip, int nw, double *w);
+    void rftfsub(int n, double *a, int nc, double *c);
+    void rftbsub(int n, double *a, int nc, double *c);
+    int nw, nc;
+    double xi;
+
+    nw = ip[0];
+    if (n > (nw << 2)) {
+        nw = n >> 2;
+        makewt(nw, ip, w);
+    }
+    nc = ip[1];
+    if (n > (nc << 2)) {
+        nc = n >> 2;
+        makect(nc, ip, w + nw);
+    }
+    if (isgn >= 0) {
+        if (n > 4) {
+            cftfsub(n, a, ip, nw, w);
+            rftfsub(n, a, nc, w + nw);
+        } else if (n == 4) {
+            cftfsub(n, a, ip, nw, w);
+        }
+        xi = a[0] - a[1];
+        a[0] += a[1];
+        a[1] = xi;
+    } else {
+        a[1] = 0.5 * (a[0] - a[1]);
+        a[0] -= a[1];
+        if (n > 4) {
+            rftbsub(n, a, nc, w + nw);
+            cftbsub(n, a, ip, nw, w);
+        } else if (n == 4) {
+            cftbsub(n, a, ip, nw, w);
+        }
+    }
+}
+
+
+/* -------- initializing routines -------- */
+
+
+#include <math.h>
+
+void makewt(int nw, int *ip, double *w)
+{
+    void makeipt(int nw, int *ip);
+    int j, nwh, nw0, nw1;
+    double delta, wn4r, wk1r, wk1i, wk3r, wk3i;
+
+    ip[0] = nw;
+    ip[1] = 1;
+    if (nw > 2) {
+        nwh = nw >> 1;
+        delta = atan(1.0) / nwh;
+        wn4r = cos(delta * nwh);
+        w[0] = 1;
+        w[1] = wn4r;
+        if (nwh == 4) {
+            w[2] = cos(delta * 2);
+            w[3] = sin(delta * 2);
+        } else if (nwh > 4) {
+            makeipt(nw, ip);
+            w[2] = 0.5 / cos(delta * 2);
+            w[3] = 0.5 / cos(delta * 6);
+            for (j = 4; j < nwh; j += 4) {
+                w[j] = cos(delta * j);
+                w[j + 1] = sin(delta * j);
+                w[j + 2] = cos(3 * delta * j);
+                w[j + 3] = -sin(3 * delta * j);
+            }
+        }
+        nw0 = 0;
+        while (nwh > 2) {
+            nw1 = nw0 + nwh;
+            nwh >>= 1;
+            w[nw1] = 1;
+            w[nw1 + 1] = wn4r;
+            if (nwh == 4) {
+                wk1r = w[nw0 + 4];
+                wk1i = w[nw0 + 5];
+                w[nw1 + 2] = wk1r;
+                w[nw1 + 3] = wk1i;
+            } else if (nwh > 4) {
+                wk1r = w[nw0 + 4];
+                wk3r = w[nw0 + 6];
+                w[nw1 + 2] = 0.5 / wk1r;
+                w[nw1 + 3] = 0.5 / wk3r;
+                for (j = 4; j < nwh; j += 4) {
+                    wk1r = w[nw0 + 2 * j];
+                    wk1i = w[nw0 + 2 * j + 1];
+                    wk3r = w[nw0 + 2 * j + 2];
+                    wk3i = w[nw0 + 2 * j + 3];
+                    w[nw1 + j] = wk1r;
+                    w[nw1 + j + 1] = wk1i;
+                    w[nw1 + j + 2] = wk3r;
+                    w[nw1 + j + 3] = wk3i;
+                }
+            }
+            nw0 = nw1;
+        }
+    }
+}
+
+
+void makeipt(int nw, int *ip)
+{
+    int j, l, m, m2, p, q;
+
+    ip[2] = 0;
+    ip[3] = 16;
+    m = 2;
+    for (l = nw; l > 32; l >>= 2) {
+        m2 = m << 1;
+        q = m2 << 3;
+        for (j = m; j < m2; j++) {
+            p = ip[j] << 2;
+            ip[m + j] = p;
+            ip[m2 + j] = p + q;
+        }
+        m = m2;
+    }
+}
+
+
+void makect(int nc, int *ip, double *c)
+{
+    int j, nch;
+    double delta;
+
+    ip[1] = nc;
+    if (nc > 1) {
+        nch = nc >> 1;
+        delta = atan(1.0) / nch;
+        c[0] = cos(delta * nch);
+        c[nch] = 0.5 * c[0];
+        for (j = 1; j < nch; j++) {
+            c[j] = 0.5 * cos(delta * j);
+            c[nc - j] = 0.5 * sin(delta * j);
+        }
+    }
+}
+
+
+/* -------- child routines -------- */
+
+
+#ifdef USE_CDFT_PTHREADS
+#define USE_CDFT_THREADS
+#ifndef CDFT_THREADS_BEGIN_N
+#define CDFT_THREADS_BEGIN_N 8192
+#endif
+#ifndef CDFT_4THREADS_BEGIN_N
+#define CDFT_4THREADS_BEGIN_N 65536
+#endif
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#define cdft_thread_t pthread_t
+#define cdft_thread_create(thp,func,argp) { \
+    if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \
+        fprintf(stderr, "cdft thread error\n"); \
+        exit(1); \
+    } \
+}
+#define cdft_thread_wait(th) { \
+    if (pthread_join(th, NULL) != 0) { \
+        fprintf(stderr, "cdft thread error\n"); \
+        exit(1); \
+    } \
+}
+#endif /* USE_CDFT_PTHREADS */
+
+
+#ifdef USE_CDFT_WINTHREADS
+#define USE_CDFT_THREADS
+#ifndef CDFT_THREADS_BEGIN_N
+#define CDFT_THREADS_BEGIN_N 32768
+#endif
+#ifndef CDFT_4THREADS_BEGIN_N
+#define CDFT_4THREADS_BEGIN_N 524288
+#endif
+#include <windows.h>
+#include <stdio.h>
+#include <stdlib.h>
+#define cdft_thread_t HANDLE
+#define cdft_thread_create(thp,func,argp) { \
+    DWORD thid; \
+    *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \
+    if (*(thp) == 0) { \
+        fprintf(stderr, "cdft thread error\n"); \
+        exit(1); \
+    } \
+}
+#define cdft_thread_wait(th) { \
+    WaitForSingleObject(th, INFINITE); \
+    CloseHandle(th); \
+}
+#endif /* USE_CDFT_WINTHREADS */
+
+
+void cftfsub(int n, double *a, int *ip, int nw, double *w)
+{
+    void bitrv2(int n, int *ip, double *a);
+    void bitrv216(double *a);
+    void bitrv208(double *a);
+    void cftf1st(int n, double *a, double *w);
+    void cftrec4(int n, double *a, int nw, double *w);
+    void cftleaf(int n, int isplt, double *a, int nw, double *w);
+    void cftfx41(int n, double *a, int nw, double *w);
+    void cftf161(double *a, double *w);
+    void cftf081(double *a, double *w);
+    void cftf040(double *a);
+    void cftx020(double *a);
+#ifdef USE_CDFT_THREADS
+    void cftrec4_th(int n, double *a, int nw, double *w);
+#endif /* USE_CDFT_THREADS */
+
+    if (n > 8) {
+        if (n > 32) {
+            cftf1st(n, a, &w[nw - (n >> 2)]);
+#ifdef USE_CDFT_THREADS
+            if (n > CDFT_THREADS_BEGIN_N) {
+                cftrec4_th(n, a, nw, w);
+            } else
+#endif /* USE_CDFT_THREADS */
+            if (n > 512) {
+                cftrec4(n, a, nw, w);
+            } else if (n > 128) {
+                cftleaf(n, 1, a, nw, w);
+            } else {
+                cftfx41(n, a, nw, w);
+            }
+            bitrv2(n, ip, a);
+        } else if (n == 32) {
+            cftf161(a, &w[nw - 8]);
+            bitrv216(a);
+        } else {
+            cftf081(a, w);
+            bitrv208(a);
+        }
+    } else if (n == 8) {
+        cftf040(a);
+    } else if (n == 4) {
+        cftx020(a);
+    }
+}
+
+
+void cftbsub(int n, double *a, int *ip, int nw, double *w)
+{
+    void bitrv2conj(int n, int *ip, double *a);
+    void bitrv216neg(double *a);
+    void bitrv208neg(double *a);
+    void cftb1st(int n, double *a, double *w);
+    void cftrec4(int n, double *a, int nw, double *w);
+    void cftleaf(int n, int isplt, double *a, int nw, double *w);
+    void cftfx41(int n, double *a, int nw, double *w);
+    void cftf161(double *a, double *w);
+    void cftf081(double *a, double *w);
+    void cftb040(double *a);
+    void cftx020(double *a);
+#ifdef USE_CDFT_THREADS
+    void cftrec4_th(int n, double *a, int nw, double *w);
+#endif /* USE_CDFT_THREADS */
+
+    if (n > 8) {
+        if (n > 32) {
+            cftb1st(n, a, &w[nw - (n >> 2)]);
+#ifdef USE_CDFT_THREADS
+            if (n > CDFT_THREADS_BEGIN_N) {
+                cftrec4_th(n, a, nw, w);
+            } else
+#endif /* USE_CDFT_THREADS */
+            if (n > 512) {
+                cftrec4(n, a, nw, w);
+            } else if (n > 128) {
+                cftleaf(n, 1, a, nw, w);
+            } else {
+                cftfx41(n, a, nw, w);
+            }
+            bitrv2conj(n, ip, a);
+        } else if (n == 32) {
+            cftf161(a, &w[nw - 8]);
+            bitrv216neg(a);
+        } else {
+            cftf081(a, w);
+            bitrv208neg(a);
+        }
+    } else if (n == 8) {
+        cftb040(a);
+    } else if (n == 4) {
+        cftx020(a);
+    }
+}
+
+
+void bitrv2(int n, int *ip, double *a)
+{
+    int j, j1, k, k1, l, m, nh, nm;
+    double xr, xi, yr, yi;
+
+    m = 1;
+    for (l = n >> 2; l > 8; l >>= 2) {
+        m <<= 1;
+    }
+    nh = n >> 1;
+    nm = 4 * m;
+    if (l == 8) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 4 * j + 2 * ip[m + k];
+                k1 = 4 * k + 2 * ip[m + j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nh;
+                k1 += 2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += 2;
+                k1 += nh;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nh;
+                k1 -= 2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 4 * k + 2 * ip[m + k];
+            j1 = k1 + 2;
+            k1 += nh;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 += nm;
+            k1 += 2 * nm;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 += nm;
+            k1 -= nm;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 -= 2;
+            k1 -= nh;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 += nh + 2;
+            k1 += nh + 2;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 -= nh - nm;
+            k1 += 2 * nm - 2;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+        }
+    } else {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 4 * j + ip[m + k];
+                k1 = 4 * k + ip[m + j];
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nh;
+                k1 += 2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += 2;
+                k1 += nh;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nh;
+                k1 -= 2;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = a[j1 + 1];
+                yr = a[k1];
+                yi = a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 4 * k + ip[m + k];
+            j1 = k1 + 2;
+            k1 += nh;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 += nm;
+            k1 += nm;
+            xr = a[j1];
+            xi = a[j1 + 1];
+            yr = a[k1];
+            yi = a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+        }
+    }
+}
+
+
+void bitrv2conj(int n, int *ip, double *a)
+{
+    int j, j1, k, k1, l, m, nh, nm;
+    double xr, xi, yr, yi;
+
+    m = 1;
+    for (l = n >> 2; l > 8; l >>= 2) {
+        m <<= 1;
+    }
+    nh = n >> 1;
+    nm = 4 * m;
+    if (l == 8) {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 4 * j + 2 * ip[m + k];
+                k1 = 4 * k + 2 * ip[m + j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nh;
+                k1 += 2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += 2;
+                k1 += nh;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nh;
+                k1 -= 2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= 2 * nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 4 * k + 2 * ip[m + k];
+            j1 = k1 + 2;
+            k1 += nh;
+            a[j1 - 1] = -a[j1 - 1];
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            a[k1 + 3] = -a[k1 + 3];
+            j1 += nm;
+            k1 += 2 * nm;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 += nm;
+            k1 -= nm;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 -= 2;
+            k1 -= nh;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 += nh + 2;
+            k1 += nh + 2;
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            j1 -= nh - nm;
+            k1 += 2 * nm - 2;
+            a[j1 - 1] = -a[j1 - 1];
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            a[k1 + 3] = -a[k1 + 3];
+        }
+    } else {
+        for (k = 0; k < m; k++) {
+            for (j = 0; j < k; j++) {
+                j1 = 4 * j + ip[m + k];
+                k1 = 4 * k + ip[m + j];
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nh;
+                k1 += 2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += 2;
+                k1 += nh;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 += nm;
+                k1 += nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nh;
+                k1 -= 2;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+                j1 -= nm;
+                k1 -= nm;
+                xr = a[j1];
+                xi = -a[j1 + 1];
+                yr = a[k1];
+                yi = -a[k1 + 1];
+                a[j1] = yr;
+                a[j1 + 1] = yi;
+                a[k1] = xr;
+                a[k1 + 1] = xi;
+            }
+            k1 = 4 * k + ip[m + k];
+            j1 = k1 + 2;
+            k1 += nh;
+            a[j1 - 1] = -a[j1 - 1];
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            a[k1 + 3] = -a[k1 + 3];
+            j1 += nm;
+            k1 += nm;
+            a[j1 - 1] = -a[j1 - 1];
+            xr = a[j1];
+            xi = -a[j1 + 1];
+            yr = a[k1];
+            yi = -a[k1 + 1];
+            a[j1] = yr;
+            a[j1 + 1] = yi;
+            a[k1] = xr;
+            a[k1 + 1] = xi;
+            a[k1 + 3] = -a[k1 + 3];
+        }
+    }
+}
+
+
+void bitrv216(double *a)
+{
+    double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
+        x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i,
+        x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
+
+    x1r = a[2];
+    x1i = a[3];
+    x2r = a[4];
+    x2i = a[5];
+    x3r = a[6];
+    x3i = a[7];
+    x4r = a[8];
+    x4i = a[9];
+    x5r = a[10];
+    x5i = a[11];
+    x7r = a[14];
+    x7i = a[15];
+    x8r = a[16];
+    x8i = a[17];
+    x10r = a[20];
+    x10i = a[21];
+    x11r = a[22];
+    x11i = a[23];
+    x12r = a[24];
+    x12i = a[25];
+    x13r = a[26];
+    x13i = a[27];
+    x14r = a[28];
+    x14i = a[29];
+    a[2] = x8r;
+    a[3] = x8i;
+    a[4] = x4r;
+    a[5] = x4i;
+    a[6] = x12r;
+    a[7] = x12i;
+    a[8] = x2r;
+    a[9] = x2i;
+    a[10] = x10r;
+    a[11] = x10i;
+    a[14] = x14r;
+    a[15] = x14i;
+    a[16] = x1r;
+    a[17] = x1i;
+    a[20] = x5r;
+    a[21] = x5i;
+    a[22] = x13r;
+    a[23] = x13i;
+    a[24] = x3r;
+    a[25] = x3i;
+    a[26] = x11r;
+    a[27] = x11i;
+    a[28] = x7r;
+    a[29] = x7i;
+}
+
+
+void bitrv216neg(double *a)
+{
+    double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
+        x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i,
+        x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i,
+        x13r, x13i, x14r, x14i, x15r, x15i;
+
+    x1r = a[2];
+    x1i = a[3];
+    x2r = a[4];
+    x2i = a[5];
+    x3r = a[6];
+    x3i = a[7];
+    x4r = a[8];
+    x4i = a[9];
+    x5r = a[10];
+    x5i = a[11];
+    x6r = a[12];
+    x6i = a[13];
+    x7r = a[14];
+    x7i = a[15];
+    x8r = a[16];
+    x8i = a[17];
+    x9r = a[18];
+    x9i = a[19];
+    x10r = a[20];
+    x10i = a[21];
+    x11r = a[22];
+    x11i = a[23];
+    x12r = a[24];
+    x12i = a[25];
+    x13r = a[26];
+    x13i = a[27];
+    x14r = a[28];
+    x14i = a[29];
+    x15r = a[30];
+    x15i = a[31];
+    a[2] = x15r;
+    a[3] = x15i;
+    a[4] = x7r;
+    a[5] = x7i;
+    a[6] = x11r;
+    a[7] = x11i;
+    a[8] = x3r;
+    a[9] = x3i;
+    a[10] = x13r;
+    a[11] = x13i;
+    a[12] = x5r;
+    a[13] = x5i;
+    a[14] = x9r;
+    a[15] = x9i;
+    a[16] = x1r;
+    a[17] = x1i;
+    a[18] = x14r;
+    a[19] = x14i;
+    a[20] = x6r;
+    a[21] = x6i;
+    a[22] = x10r;
+    a[23] = x10i;
+    a[24] = x2r;
+    a[25] = x2i;
+    a[26] = x12r;
+    a[27] = x12i;
+    a[28] = x4r;
+    a[29] = x4i;
+    a[30] = x8r;
+    a[31] = x8i;
+}
+
+
+void bitrv208(double *a)
+{
+    double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
+
+    x1r = a[2];
+    x1i = a[3];
+    x3r = a[6];
+    x3i = a[7];
+    x4r = a[8];
+    x4i = a[9];
+    x6r = a[12];
+    x6i = a[13];
+    a[2] = x4r;
+    a[3] = x4i;
+    a[6] = x6r;
+    a[7] = x6i;
+    a[8] = x1r;
+    a[9] = x1i;
+    a[12] = x3r;
+    a[13] = x3i;
+}
+
+
+void bitrv208neg(double *a)
+{
+    double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
+        x5r, x5i, x6r, x6i, x7r, x7i;
+
+    x1r = a[2];
+    x1i = a[3];
+    x2r = a[4];
+    x2i = a[5];
+    x3r = a[6];
+    x3i = a[7];
+    x4r = a[8];
+    x4i = a[9];
+    x5r = a[10];
+    x5i = a[11];
+    x6r = a[12];
+    x6i = a[13];
+    x7r = a[14];
+    x7i = a[15];
+    a[2] = x7r;
+    a[3] = x7i;
+    a[4] = x3r;
+    a[5] = x3i;
+    a[6] = x5r;
+    a[7] = x5i;
+    a[8] = x1r;
+    a[9] = x1i;
+    a[10] = x6r;
+    a[11] = x6i;
+    a[12] = x2r;
+    a[13] = x2i;
+    a[14] = x4r;
+    a[15] = x4i;
+}
+
+
+void cftf1st(int n, double *a, double *w)
+{
+    int j, j0, j1, j2, j3, k, m, mh;
+    double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
+        wd1r, wd1i, wd3r, wd3i;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
+        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
+
+    mh = n >> 3;
+    m = 2 * mh;
+    j1 = m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[0] + a[j2];
+    x0i = a[1] + a[j2 + 1];
+    x1r = a[0] - a[j2];
+    x1i = a[1] - a[j2 + 1];
+    x2r = a[j1] + a[j3];
+    x2i = a[j1 + 1] + a[j3 + 1];
+    x3r = a[j1] - a[j3];
+    x3i = a[j1 + 1] - a[j3 + 1];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[j1] = x0r - x2r;
+    a[j1 + 1] = x0i - x2i;
+    a[j2] = x1r - x3i;
+    a[j2 + 1] = x1i + x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i - x3r;
+    wn4r = w[1];
+    csc1 = w[2];
+    csc3 = w[3];
+    wd1r = 1;
+    wd1i = 0;
+    wd3r = 1;
+    wd3i = 0;
+    k = 0;
+    for (j = 2; j < mh - 2; j += 4) {
+        k += 4;
+        wk1r = csc1 * (wd1r + w[k]);
+        wk1i = csc1 * (wd1i + w[k + 1]);
+        wk3r = csc3 * (wd3r + w[k + 2]);
+        wk3i = csc3 * (wd3i + w[k + 3]);
+        wd1r = w[k];
+        wd1i = w[k + 1];
+        wd3r = w[k + 2];
+        wd3i = w[k + 3];
+        j1 = j + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j] + a[j2];
+        x0i = a[j + 1] + a[j2 + 1];
+        x1r = a[j] - a[j2];
+        x1i = a[j + 1] - a[j2 + 1];
+        y0r = a[j + 2] + a[j2 + 2];
+        y0i = a[j + 3] + a[j2 + 3];
+        y1r = a[j + 2] - a[j2 + 2];
+        y1i = a[j + 3] - a[j2 + 3];
+        x2r = a[j1] + a[j3];
+        x2i = a[j1 + 1] + a[j3 + 1];
+        x3r = a[j1] - a[j3];
+        x3i = a[j1 + 1] - a[j3 + 1];
+        y2r = a[j1 + 2] + a[j3 + 2];
+        y2i = a[j1 + 3] + a[j3 + 3];
+        y3r = a[j1 + 2] - a[j3 + 2];
+        y3i = a[j1 + 3] - a[j3 + 3];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j + 2] = y0r + y2r;
+        a[j + 3] = y0i + y2i;
+        a[j1] = x0r - x2r;
+        a[j1 + 1] = x0i - x2i;
+        a[j1 + 2] = y0r - y2r;
+        a[j1 + 3] = y0i - y2i;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j2] = wk1r * x0r - wk1i * x0i;
+        a[j2 + 1] = wk1r * x0i + wk1i * x0r;
+        x0r = y1r - y3i;
+        x0i = y1i + y3r;
+        a[j2 + 2] = wd1r * x0r - wd1i * x0i;
+        a[j2 + 3] = wd1r * x0i + wd1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j3] = wk3r * x0r + wk3i * x0i;
+        a[j3 + 1] = wk3r * x0i - wk3i * x0r;
+        x0r = y1r + y3i;
+        x0i = y1i - y3r;
+        a[j3 + 2] = wd3r * x0r + wd3i * x0i;
+        a[j3 + 3] = wd3r * x0i - wd3i * x0r;
+        j0 = m - j;
+        j1 = j0 + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j0] + a[j2];
+        x0i = a[j0 + 1] + a[j2 + 1];
+        x1r = a[j0] - a[j2];
+        x1i = a[j0 + 1] - a[j2 + 1];
+        y0r = a[j0 - 2] + a[j2 - 2];
+        y0i = a[j0 - 1] + a[j2 - 1];
+        y1r = a[j0 - 2] - a[j2 - 2];
+        y1i = a[j0 - 1] - a[j2 - 1];
+        x2r = a[j1] + a[j3];
+        x2i = a[j1 + 1] + a[j3 + 1];
+        x3r = a[j1] - a[j3];
+        x3i = a[j1 + 1] - a[j3 + 1];
+        y2r = a[j1 - 2] + a[j3 - 2];
+        y2i = a[j1 - 1] + a[j3 - 1];
+        y3r = a[j1 - 2] - a[j3 - 2];
+        y3i = a[j1 - 1] - a[j3 - 1];
+        a[j0] = x0r + x2r;
+        a[j0 + 1] = x0i + x2i;
+        a[j0 - 2] = y0r + y2r;
+        a[j0 - 1] = y0i + y2i;
+        a[j1] = x0r - x2r;
+        a[j1 + 1] = x0i - x2i;
+        a[j1 - 2] = y0r - y2r;
+        a[j1 - 1] = y0i - y2i;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j2] = wk1i * x0r - wk1r * x0i;
+        a[j2 + 1] = wk1i * x0i + wk1r * x0r;
+        x0r = y1r - y3i;
+        x0i = y1i + y3r;
+        a[j2 - 2] = wd1i * x0r - wd1r * x0i;
+        a[j2 - 1] = wd1i * x0i + wd1r * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j3] = wk3i * x0r + wk3r * x0i;
+        a[j3 + 1] = wk3i * x0i - wk3r * x0r;
+        x0r = y1r + y3i;
+        x0i = y1i - y3r;
+        a[j3 - 2] = wd3i * x0r + wd3r * x0i;
+        a[j3 - 1] = wd3i * x0i - wd3r * x0r;
+    }
+    wk1r = csc1 * (wd1r + wn4r);
+    wk1i = csc1 * (wd1i + wn4r);
+    wk3r = csc3 * (wd3r - wn4r);
+    wk3i = csc3 * (wd3i - wn4r);
+    j0 = mh;
+    j1 = j0 + m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[j0 - 2] + a[j2 - 2];
+    x0i = a[j0 - 1] + a[j2 - 1];
+    x1r = a[j0 - 2] - a[j2 - 2];
+    x1i = a[j0 - 1] - a[j2 - 1];
+    x2r = a[j1 - 2] + a[j3 - 2];
+    x2i = a[j1 - 1] + a[j3 - 1];
+    x3r = a[j1 - 2] - a[j3 - 2];
+    x3i = a[j1 - 1] - a[j3 - 1];
+    a[j0 - 2] = x0r + x2r;
+    a[j0 - 1] = x0i + x2i;
+    a[j1 - 2] = x0r - x2r;
+    a[j1 - 1] = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j2 - 2] = wk1r * x0r - wk1i * x0i;
+    a[j2 - 1] = wk1r * x0i + wk1i * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j3 - 2] = wk3r * x0r + wk3i * x0i;
+    a[j3 - 1] = wk3r * x0i - wk3i * x0r;
+    x0r = a[j0] + a[j2];
+    x0i = a[j0 + 1] + a[j2 + 1];
+    x1r = a[j0] - a[j2];
+    x1i = a[j0 + 1] - a[j2 + 1];
+    x2r = a[j1] + a[j3];
+    x2i = a[j1 + 1] + a[j3 + 1];
+    x3r = a[j1] - a[j3];
+    x3i = a[j1 + 1] - a[j3 + 1];
+    a[j0] = x0r + x2r;
+    a[j0 + 1] = x0i + x2i;
+    a[j1] = x0r - x2r;
+    a[j1 + 1] = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j2] = wn4r * (x0r - x0i);
+    a[j2 + 1] = wn4r * (x0i + x0r);
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j3] = -wn4r * (x0r + x0i);
+    a[j3 + 1] = -wn4r * (x0i - x0r);
+    x0r = a[j0 + 2] + a[j2 + 2];
+    x0i = a[j0 + 3] + a[j2 + 3];
+    x1r = a[j0 + 2] - a[j2 + 2];
+    x1i = a[j0 + 3] - a[j2 + 3];
+    x2r = a[j1 + 2] + a[j3 + 2];
+    x2i = a[j1 + 3] + a[j3 + 3];
+    x3r = a[j1 + 2] - a[j3 + 2];
+    x3i = a[j1 + 3] - a[j3 + 3];
+    a[j0 + 2] = x0r + x2r;
+    a[j0 + 3] = x0i + x2i;
+    a[j1 + 2] = x0r - x2r;
+    a[j1 + 3] = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j2 + 2] = wk1i * x0r - wk1r * x0i;
+    a[j2 + 3] = wk1i * x0i + wk1r * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j3 + 2] = wk3i * x0r + wk3r * x0i;
+    a[j3 + 3] = wk3i * x0i - wk3r * x0r;
+}
+
+
+void cftb1st(int n, double *a, double *w)
+{
+    int j, j0, j1, j2, j3, k, m, mh;
+    double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
+        wd1r, wd1i, wd3r, wd3i;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
+        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
+
+    mh = n >> 3;
+    m = 2 * mh;
+    j1 = m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[0] + a[j2];
+    x0i = -a[1] - a[j2 + 1];
+    x1r = a[0] - a[j2];
+    x1i = -a[1] + a[j2 + 1];
+    x2r = a[j1] + a[j3];
+    x2i = a[j1 + 1] + a[j3 + 1];
+    x3r = a[j1] - a[j3];
+    x3i = a[j1 + 1] - a[j3 + 1];
+    a[0] = x0r + x2r;
+    a[1] = x0i - x2i;
+    a[j1] = x0r - x2r;
+    a[j1 + 1] = x0i + x2i;
+    a[j2] = x1r + x3i;
+    a[j2 + 1] = x1i + x3r;
+    a[j3] = x1r - x3i;
+    a[j3 + 1] = x1i - x3r;
+    wn4r = w[1];
+    csc1 = w[2];
+    csc3 = w[3];
+    wd1r = 1;
+    wd1i = 0;
+    wd3r = 1;
+    wd3i = 0;
+    k = 0;
+    for (j = 2; j < mh - 2; j += 4) {
+        k += 4;
+        wk1r = csc1 * (wd1r + w[k]);
+        wk1i = csc1 * (wd1i + w[k + 1]);
+        wk3r = csc3 * (wd3r + w[k + 2]);
+        wk3i = csc3 * (wd3i + w[k + 3]);
+        wd1r = w[k];
+        wd1i = w[k + 1];
+        wd3r = w[k + 2];
+        wd3i = w[k + 3];
+        j1 = j + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j] + a[j2];
+        x0i = -a[j + 1] - a[j2 + 1];
+        x1r = a[j] - a[j2];
+        x1i = -a[j + 1] + a[j2 + 1];
+        y0r = a[j + 2] + a[j2 + 2];
+        y0i = -a[j + 3] - a[j2 + 3];
+        y1r = a[j + 2] - a[j2 + 2];
+        y1i = -a[j + 3] + a[j2 + 3];
+        x2r = a[j1] + a[j3];
+        x2i = a[j1 + 1] + a[j3 + 1];
+        x3r = a[j1] - a[j3];
+        x3i = a[j1 + 1] - a[j3 + 1];
+        y2r = a[j1 + 2] + a[j3 + 2];
+        y2i = a[j1 + 3] + a[j3 + 3];
+        y3r = a[j1 + 2] - a[j3 + 2];
+        y3i = a[j1 + 3] - a[j3 + 3];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i - x2i;
+        a[j + 2] = y0r + y2r;
+        a[j + 3] = y0i - y2i;
+        a[j1] = x0r - x2r;
+        a[j1 + 1] = x0i + x2i;
+        a[j1 + 2] = y0r - y2r;
+        a[j1 + 3] = y0i + y2i;
+        x0r = x1r + x3i;
+        x0i = x1i + x3r;
+        a[j2] = wk1r * x0r - wk1i * x0i;
+        a[j2 + 1] = wk1r * x0i + wk1i * x0r;
+        x0r = y1r + y3i;
+        x0i = y1i + y3r;
+        a[j2 + 2] = wd1r * x0r - wd1i * x0i;
+        a[j2 + 3] = wd1r * x0i + wd1i * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i - x3r;
+        a[j3] = wk3r * x0r + wk3i * x0i;
+        a[j3 + 1] = wk3r * x0i - wk3i * x0r;
+        x0r = y1r - y3i;
+        x0i = y1i - y3r;
+        a[j3 + 2] = wd3r * x0r + wd3i * x0i;
+        a[j3 + 3] = wd3r * x0i - wd3i * x0r;
+        j0 = m - j;
+        j1 = j0 + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j0] + a[j2];
+        x0i = -a[j0 + 1] - a[j2 + 1];
+        x1r = a[j0] - a[j2];
+        x1i = -a[j0 + 1] + a[j2 + 1];
+        y0r = a[j0 - 2] + a[j2 - 2];
+        y0i = -a[j0 - 1] - a[j2 - 1];
+        y1r = a[j0 - 2] - a[j2 - 2];
+        y1i = -a[j0 - 1] + a[j2 - 1];
+        x2r = a[j1] + a[j3];
+        x2i = a[j1 + 1] + a[j3 + 1];
+        x3r = a[j1] - a[j3];
+        x3i = a[j1 + 1] - a[j3 + 1];
+        y2r = a[j1 - 2] + a[j3 - 2];
+        y2i = a[j1 - 1] + a[j3 - 1];
+        y3r = a[j1 - 2] - a[j3 - 2];
+        y3i = a[j1 - 1] - a[j3 - 1];
+        a[j0] = x0r + x2r;
+        a[j0 + 1] = x0i - x2i;
+        a[j0 - 2] = y0r + y2r;
+        a[j0 - 1] = y0i - y2i;
+        a[j1] = x0r - x2r;
+        a[j1 + 1] = x0i + x2i;
+        a[j1 - 2] = y0r - y2r;
+        a[j1 - 1] = y0i + y2i;
+        x0r = x1r + x3i;
+        x0i = x1i + x3r;
+        a[j2] = wk1i * x0r - wk1r * x0i;
+        a[j2 + 1] = wk1i * x0i + wk1r * x0r;
+        x0r = y1r + y3i;
+        x0i = y1i + y3r;
+        a[j2 - 2] = wd1i * x0r - wd1r * x0i;
+        a[j2 - 1] = wd1i * x0i + wd1r * x0r;
+        x0r = x1r - x3i;
+        x0i = x1i - x3r;
+        a[j3] = wk3i * x0r + wk3r * x0i;
+        a[j3 + 1] = wk3i * x0i - wk3r * x0r;
+        x0r = y1r - y3i;
+        x0i = y1i - y3r;
+        a[j3 - 2] = wd3i * x0r + wd3r * x0i;
+        a[j3 - 1] = wd3i * x0i - wd3r * x0r;
+    }
+    wk1r = csc1 * (wd1r + wn4r);
+    wk1i = csc1 * (wd1i + wn4r);
+    wk3r = csc3 * (wd3r - wn4r);
+    wk3i = csc3 * (wd3i - wn4r);
+    j0 = mh;
+    j1 = j0 + m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[j0 - 2] + a[j2 - 2];
+    x0i = -a[j0 - 1] - a[j2 - 1];
+    x1r = a[j0 - 2] - a[j2 - 2];
+    x1i = -a[j0 - 1] + a[j2 - 1];
+    x2r = a[j1 - 2] + a[j3 - 2];
+    x2i = a[j1 - 1] + a[j3 - 1];
+    x3r = a[j1 - 2] - a[j3 - 2];
+    x3i = a[j1 - 1] - a[j3 - 1];
+    a[j0 - 2] = x0r + x2r;
+    a[j0 - 1] = x0i - x2i;
+    a[j1 - 2] = x0r - x2r;
+    a[j1 - 1] = x0i + x2i;
+    x0r = x1r + x3i;
+    x0i = x1i + x3r;
+    a[j2 - 2] = wk1r * x0r - wk1i * x0i;
+    a[j2 - 1] = wk1r * x0i + wk1i * x0r;
+    x0r = x1r - x3i;
+    x0i = x1i - x3r;
+    a[j3 - 2] = wk3r * x0r + wk3i * x0i;
+    a[j3 - 1] = wk3r * x0i - wk3i * x0r;
+    x0r = a[j0] + a[j2];
+    x0i = -a[j0 + 1] - a[j2 + 1];
+    x1r = a[j0] - a[j2];
+    x1i = -a[j0 + 1] + a[j2 + 1];
+    x2r = a[j1] + a[j3];
+    x2i = a[j1 + 1] + a[j3 + 1];
+    x3r = a[j1] - a[j3];
+    x3i = a[j1 + 1] - a[j3 + 1];
+    a[j0] = x0r + x2r;
+    a[j0 + 1] = x0i - x2i;
+    a[j1] = x0r - x2r;
+    a[j1 + 1] = x0i + x2i;
+    x0r = x1r + x3i;
+    x0i = x1i + x3r;
+    a[j2] = wn4r * (x0r - x0i);
+    a[j2 + 1] = wn4r * (x0i + x0r);
+    x0r = x1r - x3i;
+    x0i = x1i - x3r;
+    a[j3] = -wn4r * (x0r + x0i);
+    a[j3 + 1] = -wn4r * (x0i - x0r);
+    x0r = a[j0 + 2] + a[j2 + 2];
+    x0i = -a[j0 + 3] - a[j2 + 3];
+    x1r = a[j0 + 2] - a[j2 + 2];
+    x1i = -a[j0 + 3] + a[j2 + 3];
+    x2r = a[j1 + 2] + a[j3 + 2];
+    x2i = a[j1 + 3] + a[j3 + 3];
+    x3r = a[j1 + 2] - a[j3 + 2];
+    x3i = a[j1 + 3] - a[j3 + 3];
+    a[j0 + 2] = x0r + x2r;
+    a[j0 + 3] = x0i - x2i;
+    a[j1 + 2] = x0r - x2r;
+    a[j1 + 3] = x0i + x2i;
+    x0r = x1r + x3i;
+    x0i = x1i + x3r;
+    a[j2 + 2] = wk1i * x0r - wk1r * x0i;
+    a[j2 + 3] = wk1i * x0i + wk1r * x0r;
+    x0r = x1r - x3i;
+    x0i = x1i - x3r;
+    a[j3 + 2] = wk3i * x0r + wk3r * x0i;
+    a[j3 + 3] = wk3i * x0i - wk3r * x0r;
+}
+
+
+#ifdef USE_CDFT_THREADS
+struct cdft_arg_st {
+    int n0;
+    int n;
+    double *a;
+    int nw;
+    double *w;
+};
+typedef struct cdft_arg_st cdft_arg_t;
+
+
+void cftrec4_th(int n, double *a, int nw, double *w)
+{
+    void *cftrec1_th(void *p);
+    void *cftrec2_th(void *p);
+    int i, idiv4, m, nthread;
+    cdft_thread_t th[4];
+    cdft_arg_t ag[4];
+
+    nthread = 2;
+    idiv4 = 0;
+    m = n >> 1;
+    if (n > CDFT_4THREADS_BEGIN_N) {
+        nthread = 4;
+        idiv4 = 1;
+        m >>= 1;
+    }
+    for (i = 0; i < nthread; i++) {
+        ag[i].n0 = n;
+        ag[i].n = m;
+        ag[i].a = &a[i * m];
+        ag[i].nw = nw;
+        ag[i].w = w;
+        if (i != idiv4) {
+            cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
+        } else {
+            cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
+        }
+    }
+    for (i = 0; i < nthread; i++) {
+        cdft_thread_wait(th[i]);
+    }
+}
+
+
+void *cftrec1_th(void *p)
+{
+    int cfttree(int n, int j, int k, double *a, int nw, double *w);
+    void cftleaf(int n, int isplt, double *a, int nw, double *w);
+    void cftmdl1(int n, double *a, double *w);
+    int isplt, j, k, m, n, n0, nw;
+    double *a, *w;
+
+    n0 = ((cdft_arg_t *) p)->n0;
+    n = ((cdft_arg_t *) p)->n;
+    a = ((cdft_arg_t *) p)->a;
+    nw = ((cdft_arg_t *) p)->nw;
+    w = ((cdft_arg_t *) p)->w;
+    m = n0;
+    while (m > 512) {
+        m >>= 2;
+        cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
+    }
+    cftleaf(m, 1, &a[n - m], nw, w);
+    k = 0;
+    for (j = n - m; j > 0; j -= m) {
+        k++;
+        isplt = cfttree(m, j, k, a, nw, w);
+        cftleaf(m, isplt, &a[j - m], nw, w);
+    }
+    return (void *) 0;
+}
+
+
+void *cftrec2_th(void *p)
+{
+    int cfttree(int n, int j, int k, double *a, int nw, double *w);
+    void cftleaf(int n, int isplt, double *a, int nw, double *w);
+    void cftmdl2(int n, double *a, double *w);
+    int isplt, j, k, m, n, n0, nw;
+    double *a, *w;
+
+    n0 = ((cdft_arg_t *) p)->n0;
+    n = ((cdft_arg_t *) p)->n;
+    a = ((cdft_arg_t *) p)->a;
+    nw = ((cdft_arg_t *) p)->nw;
+    w = ((cdft_arg_t *) p)->w;
+    k = 1;
+    m = n0;
+    while (m > 512) {
+        m >>= 2;
+        k <<= 2;
+        cftmdl2(m, &a[n - m], &w[nw - m]);
+    }
+    cftleaf(m, 0, &a[n - m], nw, w);
+    k >>= 1;
+    for (j = n - m; j > 0; j -= m) {
+        k++;
+        isplt = cfttree(m, j, k, a, nw, w);
+        cftleaf(m, isplt, &a[j - m], nw, w);
+    }
+    return (void *) 0;
+}
+#endif /* USE_CDFT_THREADS */
+
+
+void cftrec4(int n, double *a, int nw, double *w)
+{
+    int cfttree(int n, int j, int k, double *a, int nw, double *w);
+    void cftleaf(int n, int isplt, double *a, int nw, double *w);
+    void cftmdl1(int n, double *a, double *w);
+    int isplt, j, k, m;
+
+    m = n;
+    while (m > 512) {
+        m >>= 2;
+        cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
+    }
+    cftleaf(m, 1, &a[n - m], nw, w);
+    k = 0;
+    for (j = n - m; j > 0; j -= m) {
+        k++;
+        isplt = cfttree(m, j, k, a, nw, w);
+        cftleaf(m, isplt, &a[j - m], nw, w);
+    }
+}
+
+
+int cfttree(int n, int j, int k, double *a, int nw, double *w)
+{
+    void cftmdl1(int n, double *a, double *w);
+    void cftmdl2(int n, double *a, double *w);
+    int i, isplt, m;
+
+    if ((k & 3) != 0) {
+        isplt = k & 1;
+        if (isplt != 0) {
+            cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]);
+        } else {
+            cftmdl2(n, &a[j - n], &w[nw - n]);
+        }
+    } else {
+        m = n;
+        for (i = k; (i & 3) == 0; i >>= 2) {
+            m <<= 2;
+        }
+        isplt = i & 1;
+        if (isplt != 0) {
+            while (m > 128) {
+                cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]);
+                m >>= 2;
+            }
+        } else {
+            while (m > 128) {
+                cftmdl2(m, &a[j - m], &w[nw - m]);
+                m >>= 2;
+            }
+        }
+    }
+    return isplt;
+}
+
+
+void cftleaf(int n, int isplt, double *a, int nw, double *w)
+{
+    void cftmdl1(int n, double *a, double *w);
+    void cftmdl2(int n, double *a, double *w);
+    void cftf161(double *a, double *w);
+    void cftf162(double *a, double *w);
+    void cftf081(double *a, double *w);
+    void cftf082(double *a, double *w);
+
+    if (n == 512) {
+        cftmdl1(128, a, &w[nw - 64]);
+        cftf161(a, &w[nw - 8]);
+        cftf162(&a[32], &w[nw - 32]);
+        cftf161(&a[64], &w[nw - 8]);
+        cftf161(&a[96], &w[nw - 8]);
+        cftmdl2(128, &a[128], &w[nw - 128]);
+        cftf161(&a[128], &w[nw - 8]);
+        cftf162(&a[160], &w[nw - 32]);
+        cftf161(&a[192], &w[nw - 8]);
+        cftf162(&a[224], &w[nw - 32]);
+        cftmdl1(128, &a[256], &w[nw - 64]);
+        cftf161(&a[256], &w[nw - 8]);
+        cftf162(&a[288], &w[nw - 32]);
+        cftf161(&a[320], &w[nw - 8]);
+        cftf161(&a[352], &w[nw - 8]);
+        if (isplt != 0) {
+            cftmdl1(128, &a[384], &w[nw - 64]);
+            cftf161(&a[480], &w[nw - 8]);
+        } else {
+            cftmdl2(128, &a[384], &w[nw - 128]);
+            cftf162(&a[480], &w[nw - 32]);
+        }
+        cftf161(&a[384], &w[nw - 8]);
+        cftf162(&a[416], &w[nw - 32]);
+        cftf161(&a[448], &w[nw - 8]);
+    } else {
+        cftmdl1(64, a, &w[nw - 32]);
+        cftf081(a, &w[nw - 8]);
+        cftf082(&a[16], &w[nw - 8]);
+        cftf081(&a[32], &w[nw - 8]);
+        cftf081(&a[48], &w[nw - 8]);
+        cftmdl2(64, &a[64], &w[nw - 64]);
+        cftf081(&a[64], &w[nw - 8]);
+        cftf082(&a[80], &w[nw - 8]);
+        cftf081(&a[96], &w[nw - 8]);
+        cftf082(&a[112], &w[nw - 8]);
+        cftmdl1(64, &a[128], &w[nw - 32]);
+        cftf081(&a[128], &w[nw - 8]);
+        cftf082(&a[144], &w[nw - 8]);
+        cftf081(&a[160], &w[nw - 8]);
+        cftf081(&a[176], &w[nw - 8]);
+        if (isplt != 0) {
+            cftmdl1(64, &a[192], &w[nw - 32]);
+            cftf081(&a[240], &w[nw - 8]);
+        } else {
+            cftmdl2(64, &a[192], &w[nw - 64]);
+            cftf082(&a[240], &w[nw - 8]);
+        }
+        cftf081(&a[192], &w[nw - 8]);
+        cftf082(&a[208], &w[nw - 8]);
+        cftf081(&a[224], &w[nw - 8]);
+    }
+}
+
+
+void cftmdl1(int n, double *a, double *w)
+{
+    int j, j0, j1, j2, j3, k, m, mh;
+    double wn4r, wk1r, wk1i, wk3r, wk3i;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    mh = n >> 3;
+    m = 2 * mh;
+    j1 = m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[0] + a[j2];
+    x0i = a[1] + a[j2 + 1];
+    x1r = a[0] - a[j2];
+    x1i = a[1] - a[j2 + 1];
+    x2r = a[j1] + a[j3];
+    x2i = a[j1 + 1] + a[j3 + 1];
+    x3r = a[j1] - a[j3];
+    x3i = a[j1 + 1] - a[j3 + 1];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[j1] = x0r - x2r;
+    a[j1 + 1] = x0i - x2i;
+    a[j2] = x1r - x3i;
+    a[j2 + 1] = x1i + x3r;
+    a[j3] = x1r + x3i;
+    a[j3 + 1] = x1i - x3r;
+    wn4r = w[1];
+    k = 0;
+    for (j = 2; j < mh; j += 2) {
+        k += 4;
+        wk1r = w[k];
+        wk1i = w[k + 1];
+        wk3r = w[k + 2];
+        wk3i = w[k + 3];
+        j1 = j + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j] + a[j2];
+        x0i = a[j + 1] + a[j2 + 1];
+        x1r = a[j] - a[j2];
+        x1i = a[j + 1] - a[j2 + 1];
+        x2r = a[j1] + a[j3];
+        x2i = a[j1 + 1] + a[j3 + 1];
+        x3r = a[j1] - a[j3];
+        x3i = a[j1 + 1] - a[j3 + 1];
+        a[j] = x0r + x2r;
+        a[j + 1] = x0i + x2i;
+        a[j1] = x0r - x2r;
+        a[j1 + 1] = x0i - x2i;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j2] = wk1r * x0r - wk1i * x0i;
+        a[j2 + 1] = wk1r * x0i + wk1i * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j3] = wk3r * x0r + wk3i * x0i;
+        a[j3 + 1] = wk3r * x0i - wk3i * x0r;
+        j0 = m - j;
+        j1 = j0 + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j0] + a[j2];
+        x0i = a[j0 + 1] + a[j2 + 1];
+        x1r = a[j0] - a[j2];
+        x1i = a[j0 + 1] - a[j2 + 1];
+        x2r = a[j1] + a[j3];
+        x2i = a[j1 + 1] + a[j3 + 1];
+        x3r = a[j1] - a[j3];
+        x3i = a[j1 + 1] - a[j3 + 1];
+        a[j0] = x0r + x2r;
+        a[j0 + 1] = x0i + x2i;
+        a[j1] = x0r - x2r;
+        a[j1 + 1] = x0i - x2i;
+        x0r = x1r - x3i;
+        x0i = x1i + x3r;
+        a[j2] = wk1i * x0r - wk1r * x0i;
+        a[j2 + 1] = wk1i * x0i + wk1r * x0r;
+        x0r = x1r + x3i;
+        x0i = x1i - x3r;
+        a[j3] = wk3i * x0r + wk3r * x0i;
+        a[j3 + 1] = wk3i * x0i - wk3r * x0r;
+    }
+    j0 = mh;
+    j1 = j0 + m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[j0] + a[j2];
+    x0i = a[j0 + 1] + a[j2 + 1];
+    x1r = a[j0] - a[j2];
+    x1i = a[j0 + 1] - a[j2 + 1];
+    x2r = a[j1] + a[j3];
+    x2i = a[j1 + 1] + a[j3 + 1];
+    x3r = a[j1] - a[j3];
+    x3i = a[j1 + 1] - a[j3 + 1];
+    a[j0] = x0r + x2r;
+    a[j0 + 1] = x0i + x2i;
+    a[j1] = x0r - x2r;
+    a[j1 + 1] = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    a[j2] = wn4r * (x0r - x0i);
+    a[j2 + 1] = wn4r * (x0i + x0r);
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    a[j3] = -wn4r * (x0r + x0i);
+    a[j3 + 1] = -wn4r * (x0i - x0r);
+}
+
+
+void cftmdl2(int n, double *a, double *w)
+{
+    int j, j0, j1, j2, j3, k, kr, m, mh;
+    double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
+
+    mh = n >> 3;
+    m = 2 * mh;
+    wn4r = w[1];
+    j1 = m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[0] - a[j2 + 1];
+    x0i = a[1] + a[j2];
+    x1r = a[0] + a[j2 + 1];
+    x1i = a[1] - a[j2];
+    x2r = a[j1] - a[j3 + 1];
+    x2i = a[j1 + 1] + a[j3];
+    x3r = a[j1] + a[j3 + 1];
+    x3i = a[j1 + 1] - a[j3];
+    y0r = wn4r * (x2r - x2i);
+    y0i = wn4r * (x2i + x2r);
+    a[0] = x0r + y0r;
+    a[1] = x0i + y0i;
+    a[j1] = x0r - y0r;
+    a[j1 + 1] = x0i - y0i;
+    y0r = wn4r * (x3r - x3i);
+    y0i = wn4r * (x3i + x3r);
+    a[j2] = x1r - y0i;
+    a[j2 + 1] = x1i + y0r;
+    a[j3] = x1r + y0i;
+    a[j3 + 1] = x1i - y0r;
+    k = 0;
+    kr = 2 * m;
+    for (j = 2; j < mh; j += 2) {
+        k += 4;
+        wk1r = w[k];
+        wk1i = w[k + 1];
+        wk3r = w[k + 2];
+        wk3i = w[k + 3];
+        kr -= 4;
+        wd1i = w[kr];
+        wd1r = w[kr + 1];
+        wd3i = w[kr + 2];
+        wd3r = w[kr + 3];
+        j1 = j + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j] - a[j2 + 1];
+        x0i = a[j + 1] + a[j2];
+        x1r = a[j] + a[j2 + 1];
+        x1i = a[j + 1] - a[j2];
+        x2r = a[j1] - a[j3 + 1];
+        x2i = a[j1 + 1] + a[j3];
+        x3r = a[j1] + a[j3 + 1];
+        x3i = a[j1 + 1] - a[j3];
+        y0r = wk1r * x0r - wk1i * x0i;
+        y0i = wk1r * x0i + wk1i * x0r;
+        y2r = wd1r * x2r - wd1i * x2i;
+        y2i = wd1r * x2i + wd1i * x2r;
+        a[j] = y0r + y2r;
+        a[j + 1] = y0i + y2i;
+        a[j1] = y0r - y2r;
+        a[j1 + 1] = y0i - y2i;
+        y0r = wk3r * x1r + wk3i * x1i;
+        y0i = wk3r * x1i - wk3i * x1r;
+        y2r = wd3r * x3r + wd3i * x3i;
+        y2i = wd3r * x3i - wd3i * x3r;
+        a[j2] = y0r + y2r;
+        a[j2 + 1] = y0i + y2i;
+        a[j3] = y0r - y2r;
+        a[j3 + 1] = y0i - y2i;
+        j0 = m - j;
+        j1 = j0 + m;
+        j2 = j1 + m;
+        j3 = j2 + m;
+        x0r = a[j0] - a[j2 + 1];
+        x0i = a[j0 + 1] + a[j2];
+        x1r = a[j0] + a[j2 + 1];
+        x1i = a[j0 + 1] - a[j2];
+        x2r = a[j1] - a[j3 + 1];
+        x2i = a[j1 + 1] + a[j3];
+        x3r = a[j1] + a[j3 + 1];
+        x3i = a[j1 + 1] - a[j3];
+        y0r = wd1i * x0r - wd1r * x0i;
+        y0i = wd1i * x0i + wd1r * x0r;
+        y2r = wk1i * x2r - wk1r * x2i;
+        y2i = wk1i * x2i + wk1r * x2r;
+        a[j0] = y0r + y2r;
+        a[j0 + 1] = y0i + y2i;
+        a[j1] = y0r - y2r;
+        a[j1 + 1] = y0i - y2i;
+        y0r = wd3i * x1r + wd3r * x1i;
+        y0i = wd3i * x1i - wd3r * x1r;
+        y2r = wk3i * x3r + wk3r * x3i;
+        y2i = wk3i * x3i - wk3r * x3r;
+        a[j2] = y0r + y2r;
+        a[j2 + 1] = y0i + y2i;
+        a[j3] = y0r - y2r;
+        a[j3 + 1] = y0i - y2i;
+    }
+    wk1r = w[m];
+    wk1i = w[m + 1];
+    j0 = mh;
+    j1 = j0 + m;
+    j2 = j1 + m;
+    j3 = j2 + m;
+    x0r = a[j0] - a[j2 + 1];
+    x0i = a[j0 + 1] + a[j2];
+    x1r = a[j0] + a[j2 + 1];
+    x1i = a[j0 + 1] - a[j2];
+    x2r = a[j1] - a[j3 + 1];
+    x2i = a[j1 + 1] + a[j3];
+    x3r = a[j1] + a[j3 + 1];
+    x3i = a[j1 + 1] - a[j3];
+    y0r = wk1r * x0r - wk1i * x0i;
+    y0i = wk1r * x0i + wk1i * x0r;
+    y2r = wk1i * x2r - wk1r * x2i;
+    y2i = wk1i * x2i + wk1r * x2r;
+    a[j0] = y0r + y2r;
+    a[j0 + 1] = y0i + y2i;
+    a[j1] = y0r - y2r;
+    a[j1 + 1] = y0i - y2i;
+    y0r = wk1i * x1r - wk1r * x1i;
+    y0i = wk1i * x1i + wk1r * x1r;
+    y2r = wk1r * x3r - wk1i * x3i;
+    y2i = wk1r * x3i + wk1i * x3r;
+    a[j2] = y0r - y2r;
+    a[j2 + 1] = y0i - y2i;
+    a[j3] = y0r + y2r;
+    a[j3 + 1] = y0i + y2i;
+}
+
+
+void cftfx41(int n, double *a, int nw, double *w)
+{
+    void cftf161(double *a, double *w);
+    void cftf162(double *a, double *w);
+    void cftf081(double *a, double *w);
+    void cftf082(double *a, double *w);
+
+    if (n == 128) {
+        cftf161(a, &w[nw - 8]);
+        cftf162(&a[32], &w[nw - 32]);
+        cftf161(&a[64], &w[nw - 8]);
+        cftf161(&a[96], &w[nw - 8]);
+    } else {
+        cftf081(a, &w[nw - 8]);
+        cftf082(&a[16], &w[nw - 8]);
+        cftf081(&a[32], &w[nw - 8]);
+        cftf081(&a[48], &w[nw - 8]);
+    }
+}
+
+
+void cftf161(double *a, double *w)
+{
+    double wn4r, wk1r, wk1i,
+        x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
+        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
+        y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
+        y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
+        y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
+
+    wn4r = w[1];
+    wk1r = w[2];
+    wk1i = w[3];
+    x0r = a[0] + a[16];
+    x0i = a[1] + a[17];
+    x1r = a[0] - a[16];
+    x1i = a[1] - a[17];
+    x2r = a[8] + a[24];
+    x2i = a[9] + a[25];
+    x3r = a[8] - a[24];
+    x3i = a[9] - a[25];
+    y0r = x0r + x2r;
+    y0i = x0i + x2i;
+    y4r = x0r - x2r;
+    y4i = x0i - x2i;
+    y8r = x1r - x3i;
+    y8i = x1i + x3r;
+    y12r = x1r + x3i;
+    y12i = x1i - x3r;
+    x0r = a[2] + a[18];
+    x0i = a[3] + a[19];
+    x1r = a[2] - a[18];
+    x1i = a[3] - a[19];
+    x2r = a[10] + a[26];
+    x2i = a[11] + a[27];
+    x3r = a[10] - a[26];
+    x3i = a[11] - a[27];
+    y1r = x0r + x2r;
+    y1i = x0i + x2i;
+    y5r = x0r - x2r;
+    y5i = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    y9r = wk1r * x0r - wk1i * x0i;
+    y9i = wk1r * x0i + wk1i * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    y13r = wk1i * x0r - wk1r * x0i;
+    y13i = wk1i * x0i + wk1r * x0r;
+    x0r = a[4] + a[20];
+    x0i = a[5] + a[21];
+    x1r = a[4] - a[20];
+    x1i = a[5] - a[21];
+    x2r = a[12] + a[28];
+    x2i = a[13] + a[29];
+    x3r = a[12] - a[28];
+    x3i = a[13] - a[29];
+    y2r = x0r + x2r;
+    y2i = x0i + x2i;
+    y6r = x0r - x2r;
+    y6i = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    y10r = wn4r * (x0r - x0i);
+    y10i = wn4r * (x0i + x0r);
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    y14r = wn4r * (x0r + x0i);
+    y14i = wn4r * (x0i - x0r);
+    x0r = a[6] + a[22];
+    x0i = a[7] + a[23];
+    x1r = a[6] - a[22];
+    x1i = a[7] - a[23];
+    x2r = a[14] + a[30];
+    x2i = a[15] + a[31];
+    x3r = a[14] - a[30];
+    x3i = a[15] - a[31];
+    y3r = x0r + x2r;
+    y3i = x0i + x2i;
+    y7r = x0r - x2r;
+    y7i = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    y11r = wk1i * x0r - wk1r * x0i;
+    y11i = wk1i * x0i + wk1r * x0r;
+    x0r = x1r + x3i;
+    x0i = x1i - x3r;
+    y15r = wk1r * x0r - wk1i * x0i;
+    y15i = wk1r * x0i + wk1i * x0r;
+    x0r = y12r - y14r;
+    x0i = y12i - y14i;
+    x1r = y12r + y14r;
+    x1i = y12i + y14i;
+    x2r = y13r - y15r;
+    x2i = y13i - y15i;
+    x3r = y13r + y15r;
+    x3i = y13i + y15i;
+    a[24] = x0r + x2r;
+    a[25] = x0i + x2i;
+    a[26] = x0r - x2r;
+    a[27] = x0i - x2i;
+    a[28] = x1r - x3i;
+    a[29] = x1i + x3r;
+    a[30] = x1r + x3i;
+    a[31] = x1i - x3r;
+    x0r = y8r + y10r;
+    x0i = y8i + y10i;
+    x1r = y8r - y10r;
+    x1i = y8i - y10i;
+    x2r = y9r + y11r;
+    x2i = y9i + y11i;
+    x3r = y9r - y11r;
+    x3i = y9i - y11i;
+    a[16] = x0r + x2r;
+    a[17] = x0i + x2i;
+    a[18] = x0r - x2r;
+    a[19] = x0i - x2i;
+    a[20] = x1r - x3i;
+    a[21] = x1i + x3r;
+    a[22] = x1r + x3i;
+    a[23] = x1i - x3r;
+    x0r = y5r - y7i;
+    x0i = y5i + y7r;
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    x0r = y5r + y7i;
+    x0i = y5i - y7r;
+    x3r = wn4r * (x0r - x0i);
+    x3i = wn4r * (x0i + x0r);
+    x0r = y4r - y6i;
+    x0i = y4i + y6r;
+    x1r = y4r + y6i;
+    x1i = y4i - y6r;
+    a[8] = x0r + x2r;
+    a[9] = x0i + x2i;
+    a[10] = x0r - x2r;
+    a[11] = x0i - x2i;
+    a[12] = x1r - x3i;
+    a[13] = x1i + x3r;
+    a[14] = x1r + x3i;
+    a[15] = x1i - x3r;
+    x0r = y0r + y2r;
+    x0i = y0i + y2i;
+    x1r = y0r - y2r;
+    x1i = y0i - y2i;
+    x2r = y1r + y3r;
+    x2i = y1i + y3i;
+    x3r = y1r - y3r;
+    x3i = y1i - y3i;
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[2] = x0r - x2r;
+    a[3] = x0i - x2i;
+    a[4] = x1r - x3i;
+    a[5] = x1i + x3r;
+    a[6] = x1r + x3i;
+    a[7] = x1i - x3r;
+}
+
+
+void cftf162(double *a, double *w)
+{
+    double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i,
+        x0r, x0i, x1r, x1i, x2r, x2i,
+        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
+        y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
+        y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
+        y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
+
+    wn4r = w[1];
+    wk1r = w[4];
+    wk1i = w[5];
+    wk3r = w[6];
+    wk3i = -w[7];
+    wk2r = w[8];
+    wk2i = w[9];
+    x1r = a[0] - a[17];
+    x1i = a[1] + a[16];
+    x0r = a[8] - a[25];
+    x0i = a[9] + a[24];
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    y0r = x1r + x2r;
+    y0i = x1i + x2i;
+    y4r = x1r - x2r;
+    y4i = x1i - x2i;
+    x1r = a[0] + a[17];
+    x1i = a[1] - a[16];
+    x0r = a[8] + a[25];
+    x0i = a[9] - a[24];
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    y8r = x1r - x2i;
+    y8i = x1i + x2r;
+    y12r = x1r + x2i;
+    y12i = x1i - x2r;
+    x0r = a[2] - a[19];
+    x0i = a[3] + a[18];
+    x1r = wk1r * x0r - wk1i * x0i;
+    x1i = wk1r * x0i + wk1i * x0r;
+    x0r = a[10] - a[27];
+    x0i = a[11] + a[26];
+    x2r = wk3i * x0r - wk3r * x0i;
+    x2i = wk3i * x0i + wk3r * x0r;
+    y1r = x1r + x2r;
+    y1i = x1i + x2i;
+    y5r = x1r - x2r;
+    y5i = x1i - x2i;
+    x0r = a[2] + a[19];
+    x0i = a[3] - a[18];
+    x1r = wk3r * x0r - wk3i * x0i;
+    x1i = wk3r * x0i + wk3i * x0r;
+    x0r = a[10] + a[27];
+    x0i = a[11] - a[26];
+    x2r = wk1r * x0r + wk1i * x0i;
+    x2i = wk1r * x0i - wk1i * x0r;
+    y9r = x1r - x2r;
+    y9i = x1i - x2i;
+    y13r = x1r + x2r;
+    y13i = x1i + x2i;
+    x0r = a[4] - a[21];
+    x0i = a[5] + a[20];
+    x1r = wk2r * x0r - wk2i * x0i;
+    x1i = wk2r * x0i + wk2i * x0r;
+    x0r = a[12] - a[29];
+    x0i = a[13] + a[28];
+    x2r = wk2i * x0r - wk2r * x0i;
+    x2i = wk2i * x0i + wk2r * x0r;
+    y2r = x1r + x2r;
+    y2i = x1i + x2i;
+    y6r = x1r - x2r;
+    y6i = x1i - x2i;
+    x0r = a[4] + a[21];
+    x0i = a[5] - a[20];
+    x1r = wk2i * x0r - wk2r * x0i;
+    x1i = wk2i * x0i + wk2r * x0r;
+    x0r = a[12] + a[29];
+    x0i = a[13] - a[28];
+    x2r = wk2r * x0r - wk2i * x0i;
+    x2i = wk2r * x0i + wk2i * x0r;
+    y10r = x1r - x2r;
+    y10i = x1i - x2i;
+    y14r = x1r + x2r;
+    y14i = x1i + x2i;
+    x0r = a[6] - a[23];
+    x0i = a[7] + a[22];
+    x1r = wk3r * x0r - wk3i * x0i;
+    x1i = wk3r * x0i + wk3i * x0r;
+    x0r = a[14] - a[31];
+    x0i = a[15] + a[30];
+    x2r = wk1i * x0r - wk1r * x0i;
+    x2i = wk1i * x0i + wk1r * x0r;
+    y3r = x1r + x2r;
+    y3i = x1i + x2i;
+    y7r = x1r - x2r;
+    y7i = x1i - x2i;
+    x0r = a[6] + a[23];
+    x0i = a[7] - a[22];
+    x1r = wk1i * x0r + wk1r * x0i;
+    x1i = wk1i * x0i - wk1r * x0r;
+    x0r = a[14] + a[31];
+    x0i = a[15] - a[30];
+    x2r = wk3i * x0r - wk3r * x0i;
+    x2i = wk3i * x0i + wk3r * x0r;
+    y11r = x1r + x2r;
+    y11i = x1i + x2i;
+    y15r = x1r - x2r;
+    y15i = x1i - x2i;
+    x1r = y0r + y2r;
+    x1i = y0i + y2i;
+    x2r = y1r + y3r;
+    x2i = y1i + y3i;
+    a[0] = x1r + x2r;
+    a[1] = x1i + x2i;
+    a[2] = x1r - x2r;
+    a[3] = x1i - x2i;
+    x1r = y0r - y2r;
+    x1i = y0i - y2i;
+    x2r = y1r - y3r;
+    x2i = y1i - y3i;
+    a[4] = x1r - x2i;
+    a[5] = x1i + x2r;
+    a[6] = x1r + x2i;
+    a[7] = x1i - x2r;
+    x1r = y4r - y6i;
+    x1i = y4i + y6r;
+    x0r = y5r - y7i;
+    x0i = y5i + y7r;
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    a[8] = x1r + x2r;
+    a[9] = x1i + x2i;
+    a[10] = x1r - x2r;
+    a[11] = x1i - x2i;
+    x1r = y4r + y6i;
+    x1i = y4i - y6r;
+    x0r = y5r + y7i;
+    x0i = y5i - y7r;
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    a[12] = x1r - x2i;
+    a[13] = x1i + x2r;
+    a[14] = x1r + x2i;
+    a[15] = x1i - x2r;
+    x1r = y8r + y10r;
+    x1i = y8i + y10i;
+    x2r = y9r - y11r;
+    x2i = y9i - y11i;
+    a[16] = x1r + x2r;
+    a[17] = x1i + x2i;
+    a[18] = x1r - x2r;
+    a[19] = x1i - x2i;
+    x1r = y8r - y10r;
+    x1i = y8i - y10i;
+    x2r = y9r + y11r;
+    x2i = y9i + y11i;
+    a[20] = x1r - x2i;
+    a[21] = x1i + x2r;
+    a[22] = x1r + x2i;
+    a[23] = x1i - x2r;
+    x1r = y12r - y14i;
+    x1i = y12i + y14r;
+    x0r = y13r + y15i;
+    x0i = y13i - y15r;
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    a[24] = x1r + x2r;
+    a[25] = x1i + x2i;
+    a[26] = x1r - x2r;
+    a[27] = x1i - x2i;
+    x1r = y12r + y14i;
+    x1i = y12i - y14r;
+    x0r = y13r - y15i;
+    x0i = y13i + y15r;
+    x2r = wn4r * (x0r - x0i);
+    x2i = wn4r * (x0i + x0r);
+    a[28] = x1r - x2i;
+    a[29] = x1i + x2r;
+    a[30] = x1r + x2i;
+    a[31] = x1i - x2r;
+}
+
+
+void cftf081(double *a, double *w)
+{
+    double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
+        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
+        y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
+
+    wn4r = w[1];
+    x0r = a[0] + a[8];
+    x0i = a[1] + a[9];
+    x1r = a[0] - a[8];
+    x1i = a[1] - a[9];
+    x2r = a[4] + a[12];
+    x2i = a[5] + a[13];
+    x3r = a[4] - a[12];
+    x3i = a[5] - a[13];
+    y0r = x0r + x2r;
+    y0i = x0i + x2i;
+    y2r = x0r - x2r;
+    y2i = x0i - x2i;
+    y1r = x1r - x3i;
+    y1i = x1i + x3r;
+    y3r = x1r + x3i;
+    y3i = x1i - x3r;
+    x0r = a[2] + a[10];
+    x0i = a[3] + a[11];
+    x1r = a[2] - a[10];
+    x1i = a[3] - a[11];
+    x2r = a[6] + a[14];
+    x2i = a[7] + a[15];
+    x3r = a[6] - a[14];
+    x3i = a[7] - a[15];
+    y4r = x0r + x2r;
+    y4i = x0i + x2i;
+    y6r = x0r - x2r;
+    y6i = x0i - x2i;
+    x0r = x1r - x3i;
+    x0i = x1i + x3r;
+    x2r = x1r + x3i;
+    x2i = x1i - x3r;
+    y5r = wn4r * (x0r - x0i);
+    y5i = wn4r * (x0r + x0i);
+    y7r = wn4r * (x2r - x2i);
+    y7i = wn4r * (x2r + x2i);
+    a[8] = y1r + y5r;
+    a[9] = y1i + y5i;
+    a[10] = y1r - y5r;
+    a[11] = y1i - y5i;
+    a[12] = y3r - y7i;
+    a[13] = y3i + y7r;
+    a[14] = y3r + y7i;
+    a[15] = y3i - y7r;
+    a[0] = y0r + y4r;
+    a[1] = y0i + y4i;
+    a[2] = y0r - y4r;
+    a[3] = y0i - y4i;
+    a[4] = y2r - y6i;
+    a[5] = y2i + y6r;
+    a[6] = y2r + y6i;
+    a[7] = y2i - y6r;
+}
+
+
+void cftf082(double *a, double *w)
+{
+    double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i,
+        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
+        y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
+
+    wn4r = w[1];
+    wk1r = w[2];
+    wk1i = w[3];
+    y0r = a[0] - a[9];
+    y0i = a[1] + a[8];
+    y1r = a[0] + a[9];
+    y1i = a[1] - a[8];
+    x0r = a[4] - a[13];
+    x0i = a[5] + a[12];
+    y2r = wn4r * (x0r - x0i);
+    y2i = wn4r * (x0i + x0r);
+    x0r = a[4] + a[13];
+    x0i = a[5] - a[12];
+    y3r = wn4r * (x0r - x0i);
+    y3i = wn4r * (x0i + x0r);
+    x0r = a[2] - a[11];
+    x0i = a[3] + a[10];
+    y4r = wk1r * x0r - wk1i * x0i;
+    y4i = wk1r * x0i + wk1i * x0r;
+    x0r = a[2] + a[11];
+    x0i = a[3] - a[10];
+    y5r = wk1i * x0r - wk1r * x0i;
+    y5i = wk1i * x0i + wk1r * x0r;
+    x0r = a[6] - a[15];
+    x0i = a[7] + a[14];
+    y6r = wk1i * x0r - wk1r * x0i;
+    y6i = wk1i * x0i + wk1r * x0r;
+    x0r = a[6] + a[15];
+    x0i = a[7] - a[14];
+    y7r = wk1r * x0r - wk1i * x0i;
+    y7i = wk1r * x0i + wk1i * x0r;
+    x0r = y0r + y2r;
+    x0i = y0i + y2i;
+    x1r = y4r + y6r;
+    x1i = y4i + y6i;
+    a[0] = x0r + x1r;
+    a[1] = x0i + x1i;
+    a[2] = x0r - x1r;
+    a[3] = x0i - x1i;
+    x0r = y0r - y2r;
+    x0i = y0i - y2i;
+    x1r = y4r - y6r;
+    x1i = y4i - y6i;
+    a[4] = x0r - x1i;
+    a[5] = x0i + x1r;
+    a[6] = x0r + x1i;
+    a[7] = x0i - x1r;
+    x0r = y1r - y3i;
+    x0i = y1i + y3r;
+    x1r = y5r - y7r;
+    x1i = y5i - y7i;
+    a[8] = x0r + x1r;
+    a[9] = x0i + x1i;
+    a[10] = x0r - x1r;
+    a[11] = x0i - x1i;
+    x0r = y1r + y3i;
+    x0i = y1i - y3r;
+    x1r = y5r + y7r;
+    x1i = y5i + y7i;
+    a[12] = x0r - x1i;
+    a[13] = x0i + x1r;
+    a[14] = x0r + x1i;
+    a[15] = x0i - x1r;
+}
+
+
+void cftf040(double *a)
+{
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    x0r = a[0] + a[4];
+    x0i = a[1] + a[5];
+    x1r = a[0] - a[4];
+    x1i = a[1] - a[5];
+    x2r = a[2] + a[6];
+    x2i = a[3] + a[7];
+    x3r = a[2] - a[6];
+    x3i = a[3] - a[7];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[2] = x1r - x3i;
+    a[3] = x1i + x3r;
+    a[4] = x0r - x2r;
+    a[5] = x0i - x2i;
+    a[6] = x1r + x3i;
+    a[7] = x1i - x3r;
+}
+
+
+void cftb040(double *a)
+{
+    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
+
+    x0r = a[0] + a[4];
+    x0i = a[1] + a[5];
+    x1r = a[0] - a[4];
+    x1i = a[1] - a[5];
+    x2r = a[2] + a[6];
+    x2i = a[3] + a[7];
+    x3r = a[2] - a[6];
+    x3i = a[3] - a[7];
+    a[0] = x0r + x2r;
+    a[1] = x0i + x2i;
+    a[2] = x1r + x3i;
+    a[3] = x1i - x3r;
+    a[4] = x0r - x2r;
+    a[5] = x0i - x2i;
+    a[6] = x1r - x3i;
+    a[7] = x1i + x3r;
+}
+
+
+void cftx020(double *a)
+{
+    double x0r, x0i;
+
+    x0r = a[0] - a[2];
+    x0i = a[1] - a[3];
+    a[0] += a[2];
+    a[1] += a[3];
+    a[2] = x0r;
+    a[3] = x0i;
+}
+
+
+void rftfsub(int n, double *a, int nc, double *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr, xi, yr, yi;
+
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = 0.5 - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr - wki * xi;
+        yi = wkr * xi + wki * xr;
+        a[j] -= yr;
+        a[j + 1] -= yi;
+        a[k] += yr;
+        a[k + 1] -= yi;
+    }
+}
+
+
+void rftbsub(int n, double *a, int nc, double *c)
+{
+    int j, k, kk, ks, m;
+    double wkr, wki, xr, xi, yr, yi;
+
+    m = n >> 1;
+    ks = 2 * nc / m;
+    kk = 0;
+    for (j = 2; j < m; j += 2) {
+        k = n - j;
+        kk += ks;
+        wkr = 0.5 - c[nc - kk];
+        wki = c[kk];
+        xr = a[j] - a[k];
+        xi = a[j + 1] + a[k + 1];
+        yr = wkr * xr + wki * xi;
+        yi = wkr * xi - wki * xr;
+        a[j] -= yr;
+        a[j + 1] -= yi;
+        a[k] += yr;
+        a[k + 1] -= yi;
+    }
+}
+
+
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.cc
new file mode 100644
index 0000000..7223337
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.cc
@@ -0,0 +1,142 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Stack trace related stuff is from kaldi.
+ * Refer to
+ * https://github.com/kaldi-asr/kaldi/blob/master/src/base/kaldi-error.cc
+ */
+
+#include "kaldi-native-fbank/csrc/log.h"
+
+#ifdef KNF_HAVE_EXECINFO_H
+#include <execinfo.h>  // To get stack trace in error messages.
+#ifdef KNF_HAVE_CXXABI_H
+#include <cxxabi.h>  // For name demangling.
+// Useful to decode the stack trace, but only used if we have execinfo.h
+#endif  // KNF_HAVE_CXXABI_H
+#endif  // KNF_HAVE_EXECINFO_H
+
+#include <stdlib.h>
+
+#include <ctime>
+#include <iomanip>
+#include <string>
+
+namespace knf {
+
+std::string GetDateTimeStr() {
+  std::ostringstream os;
+  std::time_t t = std::time(nullptr);
+  std::tm tm = *std::localtime(&t);
+  os << std::put_time(&tm, "%F %T");  // yyyy-mm-dd hh:mm:ss
+  return os.str();
+}
+
+static bool LocateSymbolRange(const std::string &trace_name, std::size_t *begin,
+                              std::size_t *end) {
+  // Find the first '_' with leading ' ' or '('.
+  *begin = std::string::npos;
+  for (std::size_t i = 1; i < trace_name.size(); ++i) {
+    if (trace_name[i] != '_') {
+      continue;
+    }
+    if (trace_name[i - 1] == ' ' || trace_name[i - 1] == '(') {
+      *begin = i;
+      break;
+    }
+  }
+  if (*begin == std::string::npos) {
+    return false;
+  }
+  *end = trace_name.find_first_of(" +", *begin);
+  return *end != std::string::npos;
+}
+
+#ifdef KNF_HAVE_EXECINFO_H
+static std::string Demangle(const std::string &trace_name) {
+#ifndef KNF_HAVE_CXXABI_H
+  return trace_name;
+#else   // KNF_HAVE_CXXABI_H
+  // Try demangle the symbol. We are trying to support the following formats
+  // produced by different platforms:
+  //
+  // Linux:
+  //   ./kaldi-error-test(_ZN5kaldi13UnitTestErrorEv+0xb) [0x804965d]
+  //
+  // Mac:
+  //   0 server 0x000000010f67614d _ZNK5kaldi13MessageLogger10LogMessageEv + 813
+  //
+  // We want to extract the name e.g., '_ZN5kaldi13UnitTestErrorEv' and
+  // demangle it info a readable name like kaldi::UnitTextError.
+  std::size_t begin, end;
+  if (!LocateSymbolRange(trace_name, &begin, &end)) {
+    return trace_name;
+  }
+  std::string symbol = trace_name.substr(begin, end - begin);
+  int status;
+  char *demangled_name = abi::__cxa_demangle(symbol.c_str(), 0, 0, &status);
+  if (status == 0 && demangled_name != nullptr) {
+    symbol = demangled_name;
+    free(demangled_name);
+  }
+  return trace_name.substr(0, begin) + symbol +
+         trace_name.substr(end, std::string::npos);
+#endif  // KNF_HAVE_CXXABI_H
+}
+#endif  // KNF_HAVE_EXECINFO_H
+
+std::string GetStackTrace() {
+  std::string ans;
+#ifdef KNF_HAVE_EXECINFO_H
+  constexpr const std::size_t kMaxTraceSize = 50;
+  constexpr const std::size_t kMaxTracePrint = 50;  // Must be even.
+                                                    // Buffer for the trace.
+  void *trace[kMaxTraceSize];
+  // Get the trace.
+  std::size_t size = backtrace(trace, kMaxTraceSize);
+  // Get the trace symbols.
+  char **trace_symbol = backtrace_symbols(trace, size);
+  if (trace_symbol == nullptr) return ans;
+
+  // Compose a human-readable backtrace string.
+  ans += "[ Stack-Trace: ]\n";
+  if (size <= kMaxTracePrint) {
+    for (std::size_t i = 0; i < size; ++i) {
+      ans += Demangle(trace_symbol[i]) + "\n";
+    }
+  } else {  // Print out first+last (e.g.) 5.
+    for (std::size_t i = 0; i < kMaxTracePrint / 2; ++i) {
+      ans += Demangle(trace_symbol[i]) + "\n";
+    }
+    ans += ".\n.\n.\n";
+    for (std::size_t i = size - kMaxTracePrint / 2; i < size; ++i) {
+      ans += Demangle(trace_symbol[i]) + "\n";
+    }
+    if (size == kMaxTraceSize)
+      ans += ".\n.\n.\n";  // Stack was too long, probably a bug.
+  }
+
+  // We must free the array of pointers allocated by backtrace_symbols(),
+  // but not the strings themselves.
+  free(trace_symbol);
+#endif  // KNF_HAVE_EXECINFO_H
+  return ans;
+}
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.h
new file mode 100644
index 0000000..bd21cc3
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/log.h
@@ -0,0 +1,383 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// The content in this file is copied/modified from
+// https://github.com/k2-fsa/k2/blob/master/k2/csrc/log.h
+#ifndef KALDI_NATIVE_FBANK_CSRC_LOG_H_
+#define KALDI_NATIVE_FBANK_CSRC_LOG_H_
+
+#include <stdio.h>
+
+#include <mutex>  // NOLINT
+#include <sstream>
+#include <string>
+
+namespace knf {
+
+#if KNF_ENABLE_CHECK
+
+#if defined(NDEBUG)
+constexpr bool kDisableDebug = true;
+#else
+constexpr bool kDisableDebug = false;
+#endif
+
+enum class LogLevel {
+  kTrace = 0,
+  kDebug = 1,
+  kInfo = 2,
+  kWarning = 3,
+  kError = 4,
+  kFatal = 5,  // print message and abort the program
+};
+
+// They are used in KNF_LOG(xxx), so their names
+// do not follow the google c++ code style
+//
+// You can use them in the following way:
+//
+//  KNF_LOG(TRACE) << "some message";
+//  KNF_LOG(DEBUG) << "some message";
+#ifndef _MSC_VER
+constexpr LogLevel TRACE = LogLevel::kTrace;
+constexpr LogLevel DEBUG = LogLevel::kDebug;
+constexpr LogLevel INFO = LogLevel::kInfo;
+constexpr LogLevel WARNING = LogLevel::kWarning;
+constexpr LogLevel ERROR = LogLevel::kError;
+constexpr LogLevel FATAL = LogLevel::kFatal;
+#else
+#define TRACE LogLevel::kTrace
+#define DEBUG LogLevel::kDebug
+#define INFO LogLevel::kInfo
+#define WARNING LogLevel::kWarning
+#define ERROR LogLevel::kError
+#define FATAL LogLevel::kFatal
+#endif
+
+std::string GetStackTrace();
+
+/* Return the current log level.
+
+
+   If the current log level is TRACE, then all logged messages are printed out.
+
+   If the current log level is DEBUG, log messages with "TRACE" level are not
+   shown and all other levels are printed out.
+
+   Similarly, if the current log level is INFO, log message with "TRACE" and
+   "DEBUG" are not shown and all other levels are printed out.
+
+   If it is FATAL, then only FATAL messages are shown.
+ */
+inline LogLevel GetCurrentLogLevel() {
+  static LogLevel log_level = INFO;
+  static std::once_flag init_flag;
+  std::call_once(init_flag, []() {
+    const char *env_log_level = std::getenv("KNF_LOG_LEVEL");
+    if (env_log_level == nullptr) return;
+
+    std::string s = env_log_level;
+    if (s == "TRACE")
+      log_level = TRACE;
+    else if (s == "DEBUG")
+      log_level = DEBUG;
+    else if (s == "INFO")
+      log_level = INFO;
+    else if (s == "WARNING")
+      log_level = WARNING;
+    else if (s == "ERROR")
+      log_level = ERROR;
+    else if (s == "FATAL")
+      log_level = FATAL;
+    else
+      fprintf(stderr,
+              "Unknown KNF_LOG_LEVEL: %s"
+              "\nSupported values are: "
+              "TRACE, DEBUG, INFO, WARNING, ERROR, FATAL",
+              s.c_str());
+  });
+  return log_level;
+}
+
+inline bool EnableAbort() {
+  static std::once_flag init_flag;
+  static bool enable_abort = false;
+  std::call_once(init_flag, []() {
+    enable_abort = (std::getenv("KNF_ABORT") != nullptr);
+  });
+  return enable_abort;
+}
+
+class Logger {
+ public:
+  Logger(const char *filename, const char *func_name, uint32_t line_num,
+         LogLevel level)
+      : filename_(filename),
+        func_name_(func_name),
+        line_num_(line_num),
+        level_(level) {
+    cur_level_ = GetCurrentLogLevel();
+    fprintf(stderr, "here\n");
+    switch (level) {
+      case TRACE:
+        if (cur_level_ <= TRACE) fprintf(stderr, "[T] ");
+        break;
+      case DEBUG:
+        if (cur_level_ <= DEBUG) fprintf(stderr, "[D] ");
+        break;
+      case INFO:
+        if (cur_level_ <= INFO) fprintf(stderr, "[I] ");
+        break;
+      case WARNING:
+        if (cur_level_ <= WARNING) fprintf(stderr, "[W] ");
+        break;
+      case ERROR:
+        if (cur_level_ <= ERROR) fprintf(stderr, "[E] ");
+        break;
+      case FATAL:
+        if (cur_level_ <= FATAL) fprintf(stderr, "[F] ");
+        break;
+    }
+
+    if (cur_level_ <= level_) {
+      fprintf(stderr, "%s:%u:%s ", filename, line_num, func_name);
+    }
+  }
+
+  ~Logger() noexcept(false) {
+    static constexpr const char *kErrMsg = R"(
+    Some bad things happened. Please read the above error messages and stack
+    trace. If you are using Python, the following command may be helpful:
+
+      gdb --args python /path/to/your/code.py
+
+    (You can use `gdb` to debug the code. Please consider compiling
+    a debug version of KNF.).
+
+    If you are unable to fix it, please open an issue at:
+
+      https://github.com/csukuangfj/kaldi-native-fbank/issues/new
+    )";
+    fprintf(stderr, "\n");
+    if (level_ == FATAL) {
+      std::string stack_trace = GetStackTrace();
+      if (!stack_trace.empty()) {
+        fprintf(stderr, "\n\n%s\n", stack_trace.c_str());
+      }
+
+      fflush(nullptr);
+
+#ifndef __ANDROID_API__
+      if (EnableAbort()) {
+        // NOTE: abort() will terminate the program immediately without
+        // printing the Python stack backtrace.
+        abort();
+      }
+
+      throw std::runtime_error(kErrMsg);
+#else
+      abort();
+#endif
+    }
+  }
+
+  const Logger &operator<<(bool b) const {
+    if (cur_level_ <= level_) {
+      fprintf(stderr, b ? "true" : "false");
+    }
+    return *this;
+  }
+
+  const Logger &operator<<(int8_t i) const {
+    if (cur_level_ <= level_) fprintf(stderr, "%d", i);
+    return *this;
+  }
+
+  const Logger &operator<<(const char *s) const {
+    if (cur_level_ <= level_) fprintf(stderr, "%s", s);
+    return *this;
+  }
+
+  const Logger &operator<<(int32_t i) const {
+    if (cur_level_ <= level_) fprintf(stderr, "%d", i);
+    return *this;
+  }
+
+  const Logger &operator<<(uint32_t i) const {
+    if (cur_level_ <= level_) fprintf(stderr, "%u", i);
+    return *this;
+  }
+
+  const Logger &operator<<(uint64_t i) const {
+    if (cur_level_ <= level_)
+      fprintf(stderr, "%llu", (long long unsigned int)i);  // NOLINT
+    return *this;
+  }
+
+  const Logger &operator<<(int64_t i) const {
+    if (cur_level_ <= level_)
+      fprintf(stderr, "%lli", (long long int)i);  // NOLINT
+    return *this;
+  }
+
+  const Logger &operator<<(float f) const {
+    if (cur_level_ <= level_) fprintf(stderr, "%f", f);
+    return *this;
+  }
+
+  const Logger &operator<<(double d) const {
+    if (cur_level_ <= level_) fprintf(stderr, "%f", d);
+    return *this;
+  }
+
+  template <typename T>
+  const Logger &operator<<(const T &t) const {
+    // require T overloads operator<<
+    std::ostringstream os;
+    os << t;
+    return *this << os.str().c_str();
+  }
+
+  // specialization to fix compile error: `stringstream << nullptr` is ambiguous
+  const Logger &operator<<(const std::nullptr_t &null) const {
+    if (cur_level_ <= level_) *this << "(null)";
+    return *this;
+  }
+
+ private:
+  const char *filename_;
+  const char *func_name_;
+  uint32_t line_num_;
+  LogLevel level_;
+  LogLevel cur_level_;
+};
+#endif  // KNF_ENABLE_CHECK
+
+class Voidifier {
+ public:
+#if KNF_ENABLE_CHECK
+  void operator&(const Logger &) const {}
+#endif
+};
+#if !defined(KNF_ENABLE_CHECK)
+template <typename T>
+const Voidifier &operator<<(const Voidifier &v, T &&) {
+  return v;
+}
+#endif
+
+}  // namespace knf
+
+#define KNF_STATIC_ASSERT(x) static_assert(x, "")
+
+#ifdef KNF_ENABLE_CHECK
+
+#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \
+    defined(__PRETTY_FUNCTION__)
+// for clang and GCC
+#define KNF_FUNC __PRETTY_FUNCTION__
+#else
+// for other compilers
+#define KNF_FUNC __func__
+#endif
+
+#define KNF_CHECK(x)                                                  \
+  (x) ? (void)0                                                       \
+      : ::knf::Voidifier() &                                          \
+            ::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::FATAL) \
+                << "Check failed: " << #x << " "
+
+// WARNING: x and y may be evaluated multiple times, but this happens only
+// when the check fails. Since the program aborts if it fails, we don't think
+// the extra evaluation of x and y matters.
+//
+// CAUTION: we recommend the following use case:
+//
+//      auto x = Foo();
+//      auto y = Bar();
+//      KNF_CHECK_EQ(x, y) << "Some message";
+//
+//  And please avoid
+//
+//      KNF_CHECK_EQ(Foo(), Bar());
+//
+//  if `Foo()` or `Bar()` causes some side effects, e.g., changing some
+//  local static variables or global variables.
+#define _KNF_CHECK_OP(x, y, op)                                              \
+  ((x)op(y)) ? (void)0                                                       \
+             : ::knf::Voidifier() &                                          \
+                   ::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::FATAL) \
+                       << "Check failed: " << #x << " " << #op << " " << #y  \
+                       << " (" << (x) << " vs. " << (y) << ") "
+
+#define KNF_CHECK_EQ(x, y) _KNF_CHECK_OP(x, y, ==)
+#define KNF_CHECK_NE(x, y) _KNF_CHECK_OP(x, y, !=)
+#define KNF_CHECK_LT(x, y) _KNF_CHECK_OP(x, y, <)
+#define KNF_CHECK_LE(x, y) _KNF_CHECK_OP(x, y, <=)
+#define KNF_CHECK_GT(x, y) _KNF_CHECK_OP(x, y, >)
+#define KNF_CHECK_GE(x, y) _KNF_CHECK_OP(x, y, >=)
+
+#define KNF_LOG(x) ::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::x)
+
+// ------------------------------------------------------------
+//       For debug check
+// ------------------------------------------------------------
+// If you define the macro "-D NDEBUG" while compiling kaldi-native-fbank,
+// the following macros are in fact empty and does nothing.
+
+#define KNF_DCHECK(x) ::knf::kDisableDebug ? (void)0 : KNF_CHECK(x)
+
+#define KNF_DCHECK_EQ(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_EQ(x, y)
+
+#define KNF_DCHECK_NE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_NE(x, y)
+
+#define KNF_DCHECK_LT(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_LT(x, y)
+
+#define KNF_DCHECK_LE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_LE(x, y)
+
+#define KNF_DCHECK_GT(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_GT(x, y)
+
+#define KNF_DCHECK_GE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_GE(x, y)
+
+#define KNF_DLOG(x) \
+  ::knf::kDisableDebug ? (void)0 : ::knf::Voidifier() & KNF_LOG(x)
+
+#else
+
+#define KNF_CHECK(x) ::knf::Voidifier()
+#define KNF_LOG(x) ::knf::Voidifier()
+
+#define KNF_CHECK_EQ(x, y) ::knf::Voidifier()
+#define KNF_CHECK_NE(x, y) ::knf::Voidifier()
+#define KNF_CHECK_LT(x, y) ::knf::Voidifier()
+#define KNF_CHECK_LE(x, y) ::knf::Voidifier()
+#define KNF_CHECK_GT(x, y) ::knf::Voidifier()
+#define KNF_CHECK_GE(x, y) ::knf::Voidifier()
+
+#define KNF_DCHECK(x) ::knf::Voidifier()
+#define KNF_DLOG(x) ::knf::Voidifier()
+#define KNF_DCHECK_EQ(x, y) ::knf::Voidifier()
+#define KNF_DCHECK_NE(x, y) ::knf::Voidifier()
+#define KNF_DCHECK_LT(x, y) ::knf::Voidifier()
+#define KNF_DCHECK_LE(x, y) ::knf::Voidifier()
+#define KNF_DCHECK_GT(x, y) ::knf::Voidifier()
+#define KNF_DCHECK_GE(x, y) ::knf::Voidifier()
+
+#endif  // KNF_CHECK_NE
+
+#endif  // KALDI_NATIVE_FBANK_CSRC_LOG_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.cc
new file mode 100644
index 0000000..50c857f
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.cc
@@ -0,0 +1,257 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is copied/modified from kaldi/src/feat/mel-computations.cc
+
+#include "kaldi-native-fbank/csrc/mel-computations.h"
+
+#include <algorithm>
+#include <sstream>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+
+namespace knf {
+
+std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts) {
+  os << opts.ToString();
+  return os;
+}
+
+float MelBanks::VtlnWarpFreq(
+    float vtln_low_cutoff,  // upper+lower frequency cutoffs for VTLN.
+    float vtln_high_cutoff,
+    float low_freq,  // upper+lower frequency cutoffs in mel computation
+    float high_freq, float vtln_warp_factor, float freq) {
+  /// This computes a VTLN warping function that is not the same as HTK's one,
+  /// but has similar inputs (this function has the advantage of never producing
+  /// empty bins).
+
+  /// This function computes a warp function F(freq), defined between low_freq
+  /// and high_freq inclusive, with the following properties:
+  ///  F(low_freq) == low_freq
+  ///  F(high_freq) == high_freq
+  /// The function is continuous and piecewise linear with two inflection
+  ///   points.
+  /// The lower inflection point (measured in terms of the unwarped
+  ///  frequency) is at frequency l, determined as described below.
+  /// The higher inflection point is at a frequency h, determined as
+  ///   described below.
+  /// If l <= f <= h, then F(f) = f/vtln_warp_factor.
+  /// If the higher inflection point (measured in terms of the unwarped
+  ///   frequency) is at h, then max(h, F(h)) == vtln_high_cutoff.
+  ///   Since (by the last point) F(h) == h/vtln_warp_factor, then
+  ///   max(h, h/vtln_warp_factor) == vtln_high_cutoff, so
+  ///   h = vtln_high_cutoff / max(1, 1/vtln_warp_factor).
+  ///     = vtln_high_cutoff * min(1, vtln_warp_factor).
+  /// If the lower inflection point (measured in terms of the unwarped
+  ///   frequency) is at l, then min(l, F(l)) == vtln_low_cutoff
+  ///   This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor)
+  ///                       = vtln_low_cutoff * max(1, vtln_warp_factor)
+
+  if (freq < low_freq || freq > high_freq)
+    return freq;  // in case this gets called
+  // for out-of-range frequencies, just return the freq.
+
+  KNF_CHECK_GT(vtln_low_cutoff, low_freq);
+  KNF_CHECK_LT(vtln_high_cutoff, high_freq);
+
+  float one = 1.0f;
+  float l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
+  float h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
+  float scale = 1.0f / vtln_warp_factor;
+  float Fl = scale * l;  // F(l);
+  float Fh = scale * h;  // F(h);
+  KNF_CHECK(l > low_freq && h < high_freq);
+  // slope of left part of the 3-piece linear function
+  float scale_left = (Fl - low_freq) / (l - low_freq);
+  // [slope of center part is just "scale"]
+
+  // slope of right part of the 3-piece linear function
+  float scale_right = (high_freq - Fh) / (high_freq - h);
+
+  if (freq < l) {
+    return low_freq + scale_left * (freq - low_freq);
+  } else if (freq < h) {
+    return scale * freq;
+  } else {  // freq >= h
+    return high_freq + scale_right * (freq - high_freq);
+  }
+}
+
+float MelBanks::VtlnWarpMelFreq(
+    float vtln_low_cutoff,  // upper+lower frequency cutoffs for VTLN.
+    float vtln_high_cutoff,
+    float low_freq,  // upper+lower frequency cutoffs in mel computation
+    float high_freq, float vtln_warp_factor, float mel_freq) {
+  return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, low_freq,
+                               high_freq, vtln_warp_factor,
+                               InverseMelScale(mel_freq)));
+}
+
+MelBanks::MelBanks(const MelBanksOptions &opts,
+                   const FrameExtractionOptions &frame_opts,
+                   float vtln_warp_factor)
+    : htk_mode_(opts.htk_mode) {
+  int32_t num_bins = opts.num_bins;
+  if (num_bins < 3) KNF_LOG(FATAL) << "Must have at least 3 mel bins";
+
+  float sample_freq = frame_opts.samp_freq;
+  int32_t window_length_padded = frame_opts.PaddedWindowSize();
+  KNF_CHECK_EQ(window_length_padded % 2, 0);
+
+  int32_t num_fft_bins = window_length_padded / 2;
+  float nyquist = 0.5f * sample_freq;
+
+  float low_freq = opts.low_freq, high_freq;
+  if (opts.high_freq > 0.0f)
+    high_freq = opts.high_freq;
+  else
+    high_freq = nyquist + opts.high_freq;
+
+  if (low_freq < 0.0f || low_freq >= nyquist || high_freq <= 0.0f ||
+      high_freq > nyquist || high_freq <= low_freq) {
+    KNF_LOG(FATAL) << "Bad values in options: low-freq " << low_freq
+                   << " and high-freq " << high_freq << " vs. nyquist "
+                   << nyquist;
+  }
+
+  float fft_bin_width = sample_freq / window_length_padded;
+  // fft-bin width [think of it as Nyquist-freq / half-window-length]
+
+  float mel_low_freq = MelScale(low_freq);
+  float mel_high_freq = MelScale(high_freq);
+
+  debug_ = opts.debug_mel;
+
+  // divide by num_bins+1 in next line because of end-effects where the bins
+  // spread out to the sides.
+  float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);
+
+  float vtln_low = opts.vtln_low, vtln_high = opts.vtln_high;
+  if (vtln_high < 0.0f) {
+    vtln_high += nyquist;
+  }
+
+  if (vtln_warp_factor != 1.0f &&
+      (vtln_low < 0.0f || vtln_low <= low_freq || vtln_low >= high_freq ||
+       vtln_high <= 0.0f || vtln_high >= high_freq || vtln_high <= vtln_low)) {
+    KNF_LOG(FATAL) << "Bad values in options: vtln-low " << vtln_low
+                   << " and vtln-high " << vtln_high << ", versus "
+                   << "low-freq " << low_freq << " and high-freq " << high_freq;
+  }
+
+  bins_.resize(num_bins);
+  center_freqs_.resize(num_bins);
+
+  for (int32_t bin = 0; bin < num_bins; ++bin) {
+    float left_mel = mel_low_freq + bin * mel_freq_delta,
+          center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
+          right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;
+
+    if (vtln_warp_factor != 1.0f) {
+      left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
+                                 vtln_warp_factor, left_mel);
+      center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
+                                   vtln_warp_factor, center_mel);
+      right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
+                                  vtln_warp_factor, right_mel);
+    }
+    center_freqs_[bin] = InverseMelScale(center_mel);
+
+    // this_bin will be a vector of coefficients that is only
+    // nonzero where this mel bin is active.
+    std::vector<float> this_bin(num_fft_bins);
+
+    int32_t first_index = -1, last_index = -1;
+    for (int32_t i = 0; i < num_fft_bins; ++i) {
+      float freq = (fft_bin_width * i);  // Center frequency of this fft
+                                         // bin.
+      float mel = MelScale(freq);
+      if (mel > left_mel && mel < right_mel) {
+        float weight;
+        if (mel <= center_mel)
+          weight = (mel - left_mel) / (center_mel - left_mel);
+        else
+          weight = (right_mel - mel) / (right_mel - center_mel);
+        this_bin[i] = weight;
+        if (first_index == -1) first_index = i;
+        last_index = i;
+      }
+    }
+    KNF_CHECK(first_index != -1 && last_index >= first_index &&
+              "You may have set num_mel_bins too large.");
+
+    bins_[bin].first = first_index;
+    int32_t size = last_index + 1 - first_index;
+    bins_[bin].second.insert(bins_[bin].second.end(),
+                             this_bin.begin() + first_index,
+                             this_bin.begin() + first_index + size);
+
+    // Replicate a bug in HTK, for testing purposes.
+    if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f) {
+      bins_[bin].second[0] = 0.0;
+    }
+  }  // for (int32_t bin = 0; bin < num_bins; ++bin) {
+
+  if (debug_) {
+    std::ostringstream os;
+    for (size_t i = 0; i < bins_.size(); i++) {
+      os << "bin " << i << ", offset = " << bins_[i].first << ", vec = ";
+      for (auto k : bins_[i].second) os << k << ", ";
+      os << "\n";
+    }
+    KNF_LOG(INFO) << os.str();
+  }
+}
+
+// "power_spectrum" contains fft energies.
+void MelBanks::Compute(const float *power_spectrum,
+                       float *mel_energies_out) const {
+  int32_t num_bins = bins_.size();
+
+  for (int32_t i = 0; i < num_bins; i++) {
+    int32_t offset = bins_[i].first;
+    const auto &v = bins_[i].second;
+    float energy = 0;
+    for (int32_t k = 0; k != v.size(); ++k) {
+      energy += v[k] * power_spectrum[k + offset];
+    }
+
+    // HTK-like flooring- for testing purposes (we prefer dither)
+    if (htk_mode_ && energy < 1.0) {
+      energy = 1.0;
+    }
+
+    mel_energies_out[i] = energy;
+
+    // The following assert was added due to a problem with OpenBlas that
+    // we had at one point (it was a bug in that library).  Just to detect
+    // it early.
+    KNF_CHECK_EQ(energy, energy);  // check that energy is not nan
+  }
+
+  if (debug_) {
+    fprintf(stderr, "MEL BANKS:\n");
+    for (int32_t i = 0; i < num_bins; i++)
+      fprintf(stderr, " %f", mel_energies_out[i]);
+    fprintf(stderr, "\n");
+  }
+}
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.h
new file mode 100644
index 0000000..c186cc2
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/mel-computations.h
@@ -0,0 +1,117 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// This file is copied/modified from kaldi/src/feat/mel-computations.h
+#ifndef KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
+#define KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
+
+#include <cmath>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+
+namespace knf {
+
+struct MelBanksOptions {
+  int32_t num_bins = 25;  // e.g. 25; number of triangular bins
+  float low_freq = 20;    // e.g. 20; lower frequency cutoff
+
+  // an upper frequency cutoff; 0 -> no cutoff, negative
+  // ->added to the Nyquist frequency to get the cutoff.
+  float high_freq = 0;
+
+  float vtln_low = 100;  // vtln lower cutoff of warping function.
+
+  // vtln upper cutoff of warping function: if negative, added
+  // to the Nyquist frequency to get the cutoff.
+  float vtln_high = -500;
+
+  bool debug_mel = false;
+  // htk_mode is a "hidden" config, it does not show up on command line.
+  // Enables more exact compatibility with HTK, for testing purposes.  Affects
+  // mel-energy flooring and reproduces a bug in HTK.
+  bool htk_mode = false;
+
+  std::string ToString() const {
+    std::ostringstream os;
+    os << "num_bins: " << num_bins << "\n";
+    os << "low_freq: " << low_freq << "\n";
+    os << "high_freq: " << high_freq << "\n";
+    os << "vtln_low: " << vtln_low << "\n";
+    os << "vtln_high: " << vtln_high << "\n";
+    os << "debug_mel: " << debug_mel << "\n";
+    os << "htk_mode: " << htk_mode << "\n";
+    return os.str();
+  }
+};
+
+std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts);
+
+class MelBanks {
+ public:
+  static inline float InverseMelScale(float mel_freq) {
+    return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
+  }
+
+  static inline float MelScale(float freq) {
+    return 1127.0f * logf(1.0f + freq / 700.0f);
+  }
+
+  static float VtlnWarpFreq(
+      float vtln_low_cutoff,
+      float vtln_high_cutoff,  // discontinuities in warp func
+      float low_freq,
+      float high_freq,  // upper+lower frequency cutoffs in
+      // the mel computation
+      float vtln_warp_factor, float freq);
+
+  static float VtlnWarpMelFreq(float vtln_low_cutoff, float vtln_high_cutoff,
+                               float low_freq, float high_freq,
+                               float vtln_warp_factor, float mel_freq);
+
+  // TODO(fangjun): Remove vtln_warp_factor
+  MelBanks(const MelBanksOptions &opts,
+           const FrameExtractionOptions &frame_opts, float vtln_warp_factor);
+
+  /// Compute Mel energies (note: not log energies).
+  /// At input, "fft_energies" contains the FFT energies (not log).
+  ///
+  /// @param fft_energies 1-D array of size num_fft_bins/2+1
+  /// @param mel_energies_out  1-D array of size num_mel_bins
+  void Compute(const float *fft_energies, float *mel_energies_out) const;
+
+  int32_t NumBins() const { return bins_.size(); }
+
+ private:
+  // center frequencies of bins, numbered from 0 ... num_bins-1.
+  // Needed by GetCenterFreqs().
+  std::vector<float> center_freqs_;
+
+  // the "bins_" vector is a vector, one for each bin, of a pair:
+  // (the first nonzero fft-bin), (the vector of weights).
+  std::vector<std::pair<int32_t, std::vector<float>>> bins_;
+
+  // TODO(fangjun): Remove debug_ and htk_mode_
+  bool debug_;
+  bool htk_mode_;
+};
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.cc
new file mode 100644
index 0000000..833a6f0
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.cc
@@ -0,0 +1,165 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// The content in this file is copied/modified from
+// This file is copied/modified from kaldi/src/feat/online-feature.cc
+
+#include "kaldi-native-fbank/csrc/online-feature.h"
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+#include "kaldi-native-fbank/csrc/log.h"
+
+namespace knf {
+
+RecyclingVector::RecyclingVector(int32_t items_to_hold)
+    : items_to_hold_(items_to_hold == 0 ? -1 : items_to_hold),
+      first_available_index_(0) {}
+
+const float *RecyclingVector::At(int32_t index) const {
+  if (index < first_available_index_) {
+    KNF_LOG(FATAL) << "Attempted to retrieve feature vector that was "
+                      "already removed by the RecyclingVector (index = "
+                   << index << "; "
+                   << "first_available_index = " << first_available_index_
+                   << "; "
+                   << "size = " << Size() << ")";
+  }
+  // 'at' does size checking.
+  return items_.at(index - first_available_index_).data();
+}
+
+void RecyclingVector::PushBack(std::vector<float> item) {
+  // Note: -1 is a larger number when treated as unsigned
+  if (items_.size() == static_cast<size_t>(items_to_hold_)) {
+    items_.pop_front();
+    ++first_available_index_;
+  }
+  items_.push_back(std::move(item));
+}
+
+int32_t RecyclingVector::Size() const {
+  return first_available_index_ + static_cast<int32_t>(items_.size());
+}
+
+template <class C>
+OnlineGenericBaseFeature<C>::OnlineGenericBaseFeature(
+    const typename C::Options &opts)
+    : computer_(opts),
+      window_function_(computer_.GetFrameOptions()),
+      features_(opts.frame_opts.max_feature_vectors),
+      input_finished_(false),
+      waveform_offset_(0) {
+  // RE the following assert: search for ONLINE_IVECTOR_LIMIT in
+  // online-ivector-feature.cc.
+  // Casting to uint32, an unsigned type, means that -1 would be treated
+  // as `very large`.
+  KNF_CHECK(static_cast<uint32_t>(opts.frame_opts.max_feature_vectors) > 200);
+}
+
+template <class C>
+void OnlineGenericBaseFeature<C>::AcceptWaveform(float sampling_rate,
+                                                 const float *waveform,
+                                                 int32_t n) {
+  if (n == 0) {
+    return;  // Nothing to do.
+  }
+
+  if (input_finished_) {
+    KNF_LOG(FATAL) << "AcceptWaveform called after InputFinished() was called.";
+  }
+
+  KNF_CHECK_EQ(sampling_rate, computer_.GetFrameOptions().samp_freq);
+
+  waveform_remainder_.insert(waveform_remainder_.end(), waveform, waveform + n);
+
+  ComputeFeatures();
+}
+
+template <class C>
+void OnlineGenericBaseFeature<C>::InputFinished() {
+  input_finished_ = true;
+  ComputeFeatures();
+}
+
+template <class C>
+void OnlineGenericBaseFeature<C>::ComputeFeatures() {
+  const FrameExtractionOptions &frame_opts = computer_.GetFrameOptions();
+
+  int64_t num_samples_total = waveform_offset_ + waveform_remainder_.size();
+
+  int32_t num_frames_old = features_.Size();
+
+  int32_t num_frames_new =
+      NumFrames(num_samples_total, frame_opts, input_finished_);
+
+  KNF_CHECK_GE(num_frames_new, num_frames_old);
+
+  // note: this online feature-extraction code does not support VTLN.
+  float vtln_warp = 1.0;
+
+  std::vector<float> window;
+  bool need_raw_log_energy = computer_.NeedRawLogEnergy();
+
+  for (int32_t frame = num_frames_old; frame < num_frames_new; ++frame) {
+    std::fill(window.begin(), window.end(), 0);
+    float raw_log_energy = 0.0;
+    ExtractWindow(waveform_offset_, waveform_remainder_, frame, frame_opts,
+                  window_function_, &window,
+                  need_raw_log_energy ? &raw_log_energy : nullptr);
+
+    std::vector<float> this_feature(computer_.Dim());
+
+    computer_.Compute(raw_log_energy, vtln_warp, &window, this_feature.data());
+    features_.PushBack(std::move(this_feature));
+  }
+
+  // OK, we will now discard any portion of the signal that will not be
+  // necessary to compute frames in the future.
+  int64_t first_sample_of_next_frame =
+      FirstSampleOfFrame(num_frames_new, frame_opts);
+
+  int32_t samples_to_discard = first_sample_of_next_frame - waveform_offset_;
+
+  if (samples_to_discard > 0) {
+    // discard the leftmost part of the waveform that we no longer need.
+    int32_t new_num_samples =
+        static_cast<int32_t>(waveform_remainder_.size()) - samples_to_discard;
+
+    if (new_num_samples <= 0) {
+      // odd, but we'll try to handle it.
+      waveform_offset_ += waveform_remainder_.size();
+      waveform_remainder_.resize(0);
+    } else {
+      std::vector<float> new_remainder(new_num_samples);
+
+      std::copy(waveform_remainder_.begin() + samples_to_discard,
+                waveform_remainder_.end(), new_remainder.begin());
+      waveform_offset_ += samples_to_discard;
+
+      waveform_remainder_.swap(new_remainder);
+    }
+  }
+}
+
+template class OnlineGenericBaseFeature<FbankComputer>;
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.h
new file mode 100644
index 0000000..5ca5511
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/online-feature.h
@@ -0,0 +1,142 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// The content in this file is copied/modified from
+// This file is copied/modified from kaldi/src/feat/online-feature.h
+#ifndef KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
+#define KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
+
+#include <cstdint>
+#include <deque>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+
+namespace knf {
+
+/// This class serves as a storage for feature vectors with an option to limit
+/// the memory usage by removing old elements. The deleted frames indices are
+/// "remembered" so that regardless of the MAX_ITEMS setting, the user always
+/// provides the indices as if no deletion was being performed.
+/// This is useful when processing very long recordings which would otherwise
+/// cause the memory to eventually blow up when the features are not being
+/// removed.
+class RecyclingVector {
+ public:
+  /// By default it does not remove any elements.
+  explicit RecyclingVector(int32_t items_to_hold = -1);
+
+  ~RecyclingVector() = default;
+  RecyclingVector(const RecyclingVector &) = delete;
+  RecyclingVector &operator=(const RecyclingVector &) = delete;
+
+  // The pointer is owned by RecyclingVector
+  // Users should not free it
+  const float *At(int32_t index) const;
+
+  void PushBack(std::vector<float> item);
+
+  /// This method returns the size as if no "recycling" had happened,
+  /// i.e. equivalent to the number of times the PushBack method has been
+  /// called.
+  int32_t Size() const;
+
+ private:
+  std::deque<std::vector<float>> items_;
+  int32_t items_to_hold_;
+  int32_t first_available_index_;
+};
+
+/// This is a templated class for online feature extraction;
+/// it's templated on a class like MfccComputer or PlpComputer
+/// that does the basic feature extraction.
+template <class C>
+class OnlineGenericBaseFeature {
+ public:
+  // Constructor from options class
+  explicit OnlineGenericBaseFeature(const typename C::Options &opts);
+
+  int32_t Dim() const { return computer_.Dim(); }
+
+  float FrameShiftInSeconds() const {
+    return computer_.GetFrameOptions().frame_shift_ms / 1000.0f;
+  }
+
+  int32_t NumFramesReady() const { return features_.Size(); }
+
+  // Note: IsLastFrame() will only ever return true if you have called
+  // InputFinished() (and this frame is the last frame).
+  bool IsLastFrame(int32_t frame) const {
+    return input_finished_ && frame == NumFramesReady() - 1;
+  }
+
+  const float *GetFrame(int32_t frame) const { return features_.At(frame); }
+
+  // This would be called from the application, when you get
+  // more wave data.  Note: the sampling_rate is only provided so
+  // the code can assert that it matches the sampling rate
+  // expected in the options.
+  //
+  // @param sampling_rate The sampling_rate of the input waveform
+  // @param waveform Pointer to a 1-D array of size n
+  // @param n Number of entries in waveform
+  void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n);
+
+  // InputFinished() tells the class you won't be providing any
+  // more waveform.  This will help flush out the last frame or two
+  // of features, in the case where snip-edges == false; it also
+  // affects the return value of IsLastFrame().
+  void InputFinished();
+
+ private:
+  // This function computes any additional feature frames that it is possible to
+  // compute from 'waveform_remainder_', which at this point may contain more
+  // than just a remainder-sized quantity (because AcceptWaveform() appends to
+  // waveform_remainder_ before calling this function).  It adds these feature
+  // frames to features_, and shifts off any now-unneeded samples of input from
+  // waveform_remainder_ while incrementing waveform_offset_ by the same amount.
+  void ComputeFeatures();
+
+  C computer_;  // class that does the MFCC or PLP or filterbank computation
+
+  FeatureWindowFunction window_function_;
+
+  // features_ is the Mfcc or Plp or Fbank features that we have already
+  // computed.
+
+  RecyclingVector features_;
+
+  // True if the user has called "InputFinished()"
+  bool input_finished_;
+
+  // waveform_offset_ is the number of samples of waveform that we have
+  // already discarded, i.e. that were prior to 'waveform_remainder_'.
+  int64_t waveform_offset_;
+
+  // waveform_remainder_ is a short piece of waveform that we may need to keep
+  // after extracting all the whole frames we can (whatever length of feature
+  // will be required for the next phase of computation).
+  // It is a 1-D tensor
+  std::vector<float> waveform_remainder_;
+};
+
+using OnlineFbank = OnlineGenericBaseFeature<FbankComputer>;
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_CSRC_ONLINE_FEATURE_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.cc
new file mode 100644
index 0000000..8cb4b84
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.cc
@@ -0,0 +1,67 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/csrc/rfft.h"
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/log.h"
+
+// see fftsg.c
+#ifdef __cplusplus
+extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);
+#else
+void rdft(int n, int isgn, double *a, int *ip, double *w);
+#endif
+
+namespace knf {
+class Rfft::RfftImpl {
+ public:
+  explicit RfftImpl(int32_t n) : n_(n), ip_(2 + std::sqrt(n / 2)), w_(n / 2) {
+    KNF_CHECK_EQ(n & (n - 1), 0);
+  }
+
+  void Compute(float *in_out) {
+    std::vector<double> d(in_out, in_out + n_);
+
+    Compute(d.data());
+
+    std::copy(d.begin(), d.end(), in_out);
+  }
+
+  void Compute(double *in_out) {
+    // 1 means forward fft
+    rdft(n_, 1, in_out, ip_.data(), w_.data());
+  }
+
+ private:
+  int32_t n_;
+  std::vector<int32_t> ip_;
+  std::vector<double> w_;
+};
+
+Rfft::Rfft(int32_t n) : impl_(std::make_unique<RfftImpl>(n)) {}
+
+Rfft::~Rfft() = default;
+
+void Rfft::Compute(float *in_out) { impl_->Compute(in_out); }
+void Rfft::Compute(double *in_out) { impl_->Compute(in_out); }
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.h
new file mode 100644
index 0000000..c8cb9f8
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/rfft.h
@@ -0,0 +1,56 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_CSRC_RFFT_H_
+#define KALDI_NATIVE_FBANK_CSRC_RFFT_H_
+
+#include <memory>
+
+namespace knf {
+
+// n-point Real discrete Fourier transform
+// where n is a power of 2. n >= 2
+//
+//  R[k] = sum_j=0^n-1 in[j]*cos(2*pi*j*k/n), 0<=k<=n/2
+//  I[k] = sum_j=0^n-1 in[j]*sin(2*pi*j*k/n), 0<k<n/2
+class Rfft {
+ public:
+  // @param n Number of fft bins. it should be a power of 2.
+  explicit Rfft(int32_t n);
+  ~Rfft();
+
+  /** @param in_out A 1-D array of size n.
+   *             On return:
+   *               in_out[0] = R[0]
+   *               in_out[1] = R[n/2]
+   *               for 1 < k < n/2,
+   *                 in_out[2*k] = R[k]
+   *                 in_out[2*k+1] = I[k]
+   *
+   */
+  void Compute(float *in_out);
+  void Compute(double *in_out);
+
+ private:
+  class RfftImpl;
+  std::unique_ptr<RfftImpl> impl_;
+};
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_CSRC_RFFT_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-log.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-log.cc
new file mode 100644
index 0000000..6379633
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-log.cc
@@ -0,0 +1,73 @@
+/**
+ * Copyright      2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include "kaldi-native-fbank/csrc/log.h"
+
+namespace knf {
+
+#if KNF_ENABLE_CHECK
+
+TEST(Log, TestLog) {
+  KNF_LOG(TRACE) << "this is a trace message";
+  KNF_LOG(DEBUG) << "this is a debug message";
+  KNF_LOG(INFO) << "this is an info message";
+  KNF_LOG(WARNING) << "this is a warning message";
+  KNF_LOG(ERROR) << "this is an error message";
+
+  ASSERT_THROW(KNF_LOG(FATAL) << "This will crash the program",
+               std::runtime_error);
+
+  // For debug build
+
+  KNF_DLOG(TRACE) << "this is a trace message for debug build";
+  KNF_DLOG(DEBUG) << "this is a trace message for debug build";
+  KNF_DLOG(INFO) << "this is a trace message for debug build";
+  KNF_DLOG(ERROR) << "this is an error message for debug build";
+  KNF_DLOG(WARNING) << "this is a trace message for debug build";
+
+#if !defined(NDEBUG)
+  ASSERT_THROW(KNF_DLOG(FATAL) << "this is a trace message for debug build",
+               std::runtime_error);
+#endif
+}
+
+TEST(Log, TestCheck) {
+  KNF_CHECK_EQ(1, 1) << "ok";
+  KNF_CHECK_LE(1, 3) << "ok";
+  KNF_CHECK_LT(1, 2) << "ok";
+  KNF_CHECK_GT(2, 1) << "ok";
+  KNF_CHECK_GE(2, 1) << "ok";
+
+  ASSERT_THROW(KNF_CHECK_EQ(2, 1) << "bad things happened", std::runtime_error);
+
+  // for debug build
+  KNF_DCHECK_EQ(1, 1) << "ok";
+  KNF_DCHECK_LE(1, 3) << "ok";
+  KNF_DCHECK_LT(1, 2) << "ok";
+  KNF_DCHECK_GT(2, 1) << "ok";
+  KNF_DCHECK_GE(2, 1) << "ok";
+
+#if !defined(NDEBUG)
+  ASSERT_THROW(KNF_CHECK_EQ(2, 1) << "bad things happened", std::runtime_error);
+#endif
+}
+
+#endif
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-fbank.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-fbank.cc
new file mode 100644
index 0000000..9f595cf
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-fbank.cc
@@ -0,0 +1,48 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include "kaldi-native-fbank/csrc/online-feature.h"
+
+int main() {
+  knf::FbankOptions opts;
+  opts.frame_opts.dither = 0;
+  opts.mel_opts.num_bins = 10;
+
+  knf::OnlineFbank fbank(opts);
+  for (int32_t i = 0; i < 1600; ++i) {
+    float s = (i * i - i / 2) / 32767.;
+    fbank.AcceptWaveform(16000, &s, 1);
+  }
+
+  std::ostringstream os;
+
+  int32_t n = fbank.NumFramesReady();
+  for (int32_t i = 0; i != n; ++i) {
+    const float *frame = fbank.GetFrame(i);
+    for (int32_t k = 0; k != opts.mel_opts.num_bins; ++k) {
+      os << frame[k] << ", ";
+    }
+    os << "\n";
+  }
+
+  std::cout << os.str() << "\n";
+
+  return 0;
+}
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-feature.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-feature.cc
new file mode 100644
index 0000000..bfbe621
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-online-feature.cc
@@ -0,0 +1,59 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include "kaldi-native-fbank/csrc/online-feature.h"
+namespace knf {
+
+TEST(RecyclingVector, TestUnlimited) {
+  RecyclingVector v(-1);
+  constexpr int32_t N = 100;
+  for (int32_t i = 0; i != N; ++i) {
+    std::unique_ptr<float[]> p(new float[3]{i, i + 1, i + 2});
+    v.PushBack(std::move(p));
+  }
+  ASSERT_EQ(v.Size(), N);
+
+  for (int32_t i = 0; i != N; ++i) {
+    const float *t = v.At(i);
+    for (int32_t k = 0; k != 3; ++k) {
+      EXPECT_EQ(t[k], (i + k));
+    }
+  }
+}
+
+TEST(RecyclingVector, Testlimited) {
+  constexpr int32_t K = 3;
+  constexpr int32_t N = 10;
+  RecyclingVector v(K);
+  for (int32_t i = 0; i != N; ++i) {
+    std::unique_ptr<float[]> p(new float[3]{i, i + 1, i + 2});
+    v.PushBack(std::move(p));
+  }
+
+  ASSERT_EQ(v.Size(), N);
+
+  for (int32_t i = N - K; i != N; ++i) {
+    const float *t = v.At(i);
+
+    for (int32_t k = 0; k != 3; ++k) {
+      EXPECT_EQ(t[k], (i + k));
+    }
+  }
+}
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-rfft.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-rfft.cc
new file mode 100644
index 0000000..47f9904
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/csrc/test-rfft.cc
@@ -0,0 +1,52 @@
+/**
+ * Copyright      2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include "kaldi-native-fbank/csrc/rfft.h"
+
+namespace knf {
+
+#if 0
+>>> import torch
+>>> a = torch.tensor([1., -1, 3, 8, 20, 6, 0, 2])
+>>> torch.fft.rfft(a)
+tensor([ 39.0000+0.0000j, -28.1924-2.2929j,  18.0000+5.0000j,  -9.8076+3.7071j,
+          9.0000+0.0000j])
+#endif
+
+TEST(Rfft, TestRfft) {
+  knf::Rfft fft(8);
+  for (int32_t i = 0; i != 10; ++i) {
+    std::vector<float> d = {1, -1, 3, 8, 20, 6, 0, 2};
+    fft.Compute(d.data());
+
+    EXPECT_EQ(d[0], 39);
+    EXPECT_EQ(d[1], 9);
+
+    EXPECT_NEAR(d[2], -28.1924, 1e-3);
+    EXPECT_NEAR(-d[3], -2.2929, 1e-3);
+
+    EXPECT_NEAR(d[4], 18, 1e-3);
+    EXPECT_NEAR(-d[5], 5, 1e-3);
+
+    EXPECT_NEAR(d[6], -9.8076, 1e-3);
+    EXPECT_NEAR(-d[7], 3.7071, 1e-3);
+  }
+}
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/CMakeLists.txt
new file mode 100644
index 0000000..60d6382
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(csrc)
+add_subdirectory(tests)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/CMakeLists.txt
new file mode 100644
index 0000000..16bee54
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/CMakeLists.txt
@@ -0,0 +1,28 @@
+pybind11_add_module(_kaldi_native_fbank
+  feature-fbank.cc
+  feature-window.cc
+  kaldi-native-fbank.cc
+  mel-computations.cc
+  online-feature.cc
+  utils.cc
+)
+
+if(APPLE)
+  execute_process(
+    COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR
+  )
+  message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}")
+  target_link_libraries(_kaldi_native_fbank PRIVATE "-Wl,-rpath,${PYTHON_SITE_PACKAGE_DIR}")
+endif()
+
+if(NOT WIN32)
+  target_link_libraries(_kaldi_native_fbank PRIVATE "-Wl,-rpath,${kaldi_native_fbank_rpath_origin}/kaldi_native_fbank/lib")
+endif()
+
+target_link_libraries(_kaldi_native_fbank PRIVATE kaldi-native-fbank-core)
+
+install(TARGETS _kaldi_native_fbank
+  DESTINATION ../
+)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.cc
new file mode 100644
index 0000000..4f32895
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.cc
@@ -0,0 +1,57 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/python/csrc/feature-fbank.h"
+
+#include <memory>
+#include <string>
+
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+#include "kaldi-native-fbank/python/csrc/utils.h"
+
+namespace knf {
+
+static void PybindFbankOptions(py::module &m) {  // NOLINT
+  using PyClass = FbankOptions;
+  py::class_<PyClass>(m, "FbankOptions")
+      .def(py::init<>())
+      .def_readwrite("frame_opts", &PyClass::frame_opts)
+      .def_readwrite("mel_opts", &PyClass::mel_opts)
+      .def_readwrite("use_energy", &PyClass::use_energy)
+      .def_readwrite("energy_floor", &PyClass::energy_floor)
+      .def_readwrite("raw_energy", &PyClass::raw_energy)
+      .def_readwrite("htk_compat", &PyClass::htk_compat)
+      .def_readwrite("use_log_fbank", &PyClass::use_log_fbank)
+      .def_readwrite("use_power", &PyClass::use_power)
+      .def("__str__",
+           [](const PyClass &self) -> std::string { return self.ToString(); })
+      .def("as_dict",
+           [](const PyClass &self) -> py::dict { return AsDict(self); })
+      .def_static(
+          "from_dict",
+          [](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); })
+      .def(py::pickle(
+          [](const PyClass &self) -> py::dict { return AsDict(self); },
+          [](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); }));
+}
+
+void PybindFeatureFbank(py::module &m) {  // NOLINT
+  PybindFbankOptions(m);
+}
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.h
new file mode 100644
index 0000000..6490c22
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-fbank.h
@@ -0,0 +1,30 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
+#define KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
+
+#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
+
+namespace knf {
+
+void PybindFeatureFbank(py::module &m);  // NOLINT
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_FBANK_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.cc
new file mode 100644
index 0000000..1f3b0a5
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.cc
@@ -0,0 +1,67 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/python/csrc/feature-window.h"
+
+#include <string>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+#include "kaldi-native-fbank/python/csrc/utils.h"
+
+namespace knf {
+
+static void PybindFrameExtractionOptions(py::module &m) {  // NOLINT
+  using PyClass = FrameExtractionOptions;
+  py::class_<PyClass>(m, "FrameExtractionOptions")
+      .def(py::init<>())
+      .def_readwrite("samp_freq", &PyClass::samp_freq)
+      .def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms)
+      .def_readwrite("frame_length_ms", &PyClass::frame_length_ms)
+      .def_readwrite("dither", &PyClass::dither)
+      .def_readwrite("preemph_coeff", &PyClass::preemph_coeff)
+      .def_readwrite("remove_dc_offset", &PyClass::remove_dc_offset)
+      .def_readwrite("window_type", &PyClass::window_type)
+      .def_readwrite("round_to_power_of_two", &PyClass::round_to_power_of_two)
+      .def_readwrite("blackman_coeff", &PyClass::blackman_coeff)
+      .def_readwrite("snip_edges", &PyClass::snip_edges)
+      .def_readwrite("max_feature_vectors", &PyClass::max_feature_vectors)
+      .def("as_dict",
+           [](const PyClass &self) -> py::dict { return AsDict(self); })
+      .def_static("from_dict",
+                  [](py::dict dict) -> PyClass {
+                    return FrameExtractionOptionsFromDict(dict);
+                  })
+#if 0
+      .def_readwrite("allow_downsample",
+                     &PyClass::allow_downsample)
+      .def_readwrite("allow_upsample", &PyClass::allow_upsample)
+#endif
+      .def("__str__",
+           [](const PyClass &self) -> std::string { return self.ToString(); })
+      .def(py::pickle(
+          [](const PyClass &self) -> py::dict { return AsDict(self); },
+          [](py::dict dict) -> PyClass {
+            return FrameExtractionOptionsFromDict(dict);
+          }));
+}
+
+void PybindFeatureWindow(py::module &m) {  // NOLINT
+  PybindFrameExtractionOptions(m);
+}
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.h
new file mode 100644
index 0000000..aba5b9e
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/feature-window.h
@@ -0,0 +1,30 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
+#define KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
+
+#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
+
+namespace knf {
+
+void PybindFeatureWindow(py::module &m);  // NOLINT
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_FEATURE_WINDOW_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.cc
new file mode 100644
index 0000000..47a2d8e
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.cc
@@ -0,0 +1,37 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
+
+#include "kaldi-native-fbank/python/csrc/feature-fbank.h"
+#include "kaldi-native-fbank/python/csrc/feature-window.h"
+#include "kaldi-native-fbank/python/csrc/mel-computations.h"
+#include "kaldi-native-fbank/python/csrc/online-feature.h"
+
+namespace knf {
+
+PYBIND11_MODULE(_kaldi_native_fbank, m) {
+  m.doc() = "Python wrapper for kaldi native fbank";
+  PybindFeatureWindow(m);
+  PybindMelComputations(m);
+  PybindFeatureFbank(m);
+
+  PybindOnlineFeature(m);
+}
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.h
new file mode 100644
index 0000000..756f4ce
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/kaldi-native-fbank.h
@@ -0,0 +1,27 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
+#define KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
+
+#include "pybind11/numpy.h"
+#include "pybind11/pybind11.h"
+#include "pybind11/stl.h"
+namespace py = pybind11;
+
+#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_KALDI_NATIVE_FBANK_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.cc
new file mode 100644
index 0000000..2970b47
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.cc
@@ -0,0 +1,58 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/python/csrc/mel-computations.h"
+
+#include <string>
+
+#include "kaldi-native-fbank/csrc/mel-computations.h"
+#include "kaldi-native-fbank/python/csrc/utils.h"
+
+namespace knf {
+
+static void PybindMelBanksOptions(py::module &m) {  // NOLINT
+  using PyClass = MelBanksOptions;
+  py::class_<PyClass>(m, "MelBanksOptions")
+      .def(py::init<>())
+      .def_readwrite("num_bins", &PyClass::num_bins)
+      .def_readwrite("low_freq", &PyClass::low_freq)
+      .def_readwrite("high_freq", &PyClass::high_freq)
+      .def_readwrite("vtln_low", &PyClass::vtln_low)
+      .def_readwrite("vtln_high", &PyClass::vtln_high)
+      .def_readwrite("debug_mel", &PyClass::debug_mel)
+      .def_readwrite("htk_mode", &PyClass::htk_mode)
+      .def("__str__",
+           [](const PyClass &self) -> std::string { return self.ToString(); })
+      .def("as_dict",
+           [](const PyClass &self) -> py::dict { return AsDict(self); })
+      .def_static("from_dict",
+                  [](py::dict dict) -> PyClass {
+                    return MelBanksOptionsFromDict(dict);
+                  })
+      .def(py::pickle(
+          [](const PyClass &self) -> py::dict { return AsDict(self); },
+          [](py::dict dict) -> PyClass {
+            return MelBanksOptionsFromDict(dict);
+          }));
+}
+
+void PybindMelComputations(py::module &m) {  // NOLINT
+  PybindMelBanksOptions(m);
+}
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.h
new file mode 100644
index 0000000..2ca9ac7
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/mel-computations.h
@@ -0,0 +1,30 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
+#define KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
+
+#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
+
+namespace knf {
+
+void PybindMelComputations(py::module &m);  // NOLINT
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_MEL_COMPUTATIONS_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.cc
new file mode 100644
index 0000000..46296d1
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.cc
@@ -0,0 +1,68 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/python/csrc/online-feature.h"
+
+#include <string>
+#include <vector>
+
+#include "kaldi-native-fbank/csrc/online-feature.h"
+namespace knf {
+
+template <typename C>
+void PybindOnlineFeatureTpl(py::module &m,  // NOLINT
+                            const std::string &class_name,
+                            const std::string &class_help_doc = "") {
+  using PyClass = OnlineGenericBaseFeature<C>;
+  using Options = typename C::Options;
+  py::class_<PyClass>(m, class_name.c_str(), class_help_doc.c_str())
+      .def(py::init<const Options &>(), py::arg("opts"))
+      .def_property_readonly("dim", &PyClass::Dim)
+      .def_property_readonly("frame_shift_in_seconds",
+                             &PyClass::FrameShiftInSeconds)
+      .def_property_readonly("num_frames_ready", &PyClass::NumFramesReady)
+      .def("is_last_frame", &PyClass::IsLastFrame, py::arg("frame"))
+      .def(
+          "get_frame",
+          [](py::object obj, int32_t frame) {
+            auto *self = obj.cast<PyClass *>();
+            const float *f = self->GetFrame(frame);
+            return py::array_t<float>({self->Dim()},    // shape
+                                      {sizeof(float)},  // stride in bytes
+                                      f,                // ptr
+                                      obj);  // it will increase the reference
+                                             // count of **this** vector
+          },
+          py::arg("frame"))
+      .def(
+          "accept_waveform",
+          [](PyClass &self, float sampling_rate,
+             const std::vector<float> &waveform) {
+            self.AcceptWaveform(sampling_rate, waveform.data(),
+                                waveform.size());
+          },
+          py::arg("sampling_rate"), py::arg("waveform"),
+          py::call_guard<py::gil_scoped_release>())
+      .def("input_finished", &PyClass::InputFinished);
+}
+
+void PybindOnlineFeature(py::module &m) {  // NOLINT
+  PybindOnlineFeatureTpl<FbankComputer>(m, "OnlineFbank");
+}
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.h
new file mode 100644
index 0000000..b4a05df
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/online-feature.h
@@ -0,0 +1,30 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
+#define KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
+
+#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
+
+namespace knf {
+
+void PybindOnlineFeature(py::module &m);  // NOLINT
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_ONLINE_FEATURE_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.cc b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.cc
new file mode 100644
index 0000000..dc9d236
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.cc
@@ -0,0 +1,136 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kaldi-native-fbank/python/csrc/utils.h"
+
+#include <string>
+
+#include "kaldi-native-fbank/csrc/feature-window.h"
+
+#define FROM_DICT(type, key)         \
+  if (dict.contains(#key)) {         \
+    opts.key = py::type(dict[#key]); \
+  }
+
+#define AS_DICT(key) dict[#key] = opts.key
+
+namespace knf {
+
+FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict) {
+  FrameExtractionOptions opts;
+
+  FROM_DICT(float_, samp_freq);
+  FROM_DICT(float_, frame_shift_ms);
+  FROM_DICT(float_, frame_length_ms);
+  FROM_DICT(float_, dither);
+  FROM_DICT(float_, preemph_coeff);
+  FROM_DICT(bool_, remove_dc_offset);
+  FROM_DICT(str, window_type);
+  FROM_DICT(bool_, round_to_power_of_two);
+  FROM_DICT(float_, blackman_coeff);
+  FROM_DICT(bool_, snip_edges);
+  FROM_DICT(int_, max_feature_vectors);
+
+  return opts;
+}
+
+py::dict AsDict(const FrameExtractionOptions &opts) {
+  py::dict dict;
+
+  AS_DICT(samp_freq);
+  AS_DICT(frame_shift_ms);
+  AS_DICT(frame_length_ms);
+  AS_DICT(dither);
+  AS_DICT(preemph_coeff);
+  AS_DICT(remove_dc_offset);
+  AS_DICT(window_type);
+  AS_DICT(round_to_power_of_two);
+  AS_DICT(blackman_coeff);
+  AS_DICT(snip_edges);
+  AS_DICT(max_feature_vectors);
+
+  return dict;
+}
+
+MelBanksOptions MelBanksOptionsFromDict(py::dict dict) {
+  MelBanksOptions opts;
+
+  FROM_DICT(int_, num_bins);
+  FROM_DICT(float_, low_freq);
+  FROM_DICT(float_, high_freq);
+  FROM_DICT(float_, vtln_low);
+  FROM_DICT(float_, vtln_high);
+  FROM_DICT(bool_, debug_mel);
+  FROM_DICT(bool_, htk_mode);
+
+  return opts;
+}
+py::dict AsDict(const MelBanksOptions &opts) {
+  py::dict dict;
+
+  AS_DICT(num_bins);
+  AS_DICT(low_freq);
+  AS_DICT(high_freq);
+  AS_DICT(vtln_low);
+  AS_DICT(vtln_high);
+  AS_DICT(debug_mel);
+  AS_DICT(htk_mode);
+
+  return dict;
+}
+
+FbankOptions FbankOptionsFromDict(py::dict dict) {
+  FbankOptions opts;
+
+  if (dict.contains("frame_opts")) {
+    opts.frame_opts = FrameExtractionOptionsFromDict(dict["frame_opts"]);
+  }
+
+  if (dict.contains("mel_opts")) {
+    opts.mel_opts = MelBanksOptionsFromDict(dict["mel_opts"]);
+  }
+
+  FROM_DICT(bool_, use_energy);
+  FROM_DICT(float_, energy_floor);
+  FROM_DICT(bool_, raw_energy);
+  FROM_DICT(bool_, htk_compat);
+  FROM_DICT(bool_, use_log_fbank);
+  FROM_DICT(bool_, use_power);
+
+  return opts;
+}
+
+py::dict AsDict(const FbankOptions &opts) {
+  py::dict dict;
+
+  dict["frame_opts"] = AsDict(opts.frame_opts);
+  dict["mel_opts"] = AsDict(opts.mel_opts);
+  AS_DICT(use_energy);
+  AS_DICT(energy_floor);
+  AS_DICT(raw_energy);
+  AS_DICT(htk_compat);
+  AS_DICT(use_log_fbank);
+  AS_DICT(use_power);
+
+  return dict;
+}
+
+#undef FROM_DICT
+#undef AS_DICT
+
+}  // namespace knf
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.h b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.h
new file mode 100644
index 0000000..bb78165
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/csrc/utils.h
@@ -0,0 +1,52 @@
+/**
+ * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+ *
+ * See LICENSE for clarification regarding multiple authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
+#define KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
+
+#include "kaldi-native-fbank/csrc/feature-fbank.h"
+#include "kaldi-native-fbank/csrc/feature-window.h"
+#include "kaldi-native-fbank/csrc/mel-computations.h"
+#include "kaldi-native-fbank/python/csrc/kaldi-native-fbank.h"
+
+/*
+ * This file contains code about `from_dict` and
+ * `as_dict` for various options in kaldi-native-fbank.
+ *
+ * Regarding `from_dict`, users don't need to provide
+ * all the fields in the options. If some fields
+ * are not provided, it just uses the default one.
+ *
+ * If the provided dict in `from_dict` is empty,
+ * all fields use their default values.
+ */
+
+namespace knf {
+
+FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict);
+py::dict AsDict(const FrameExtractionOptions &opts);
+
+MelBanksOptions MelBanksOptionsFromDict(py::dict dict);
+py::dict AsDict(const MelBanksOptions &opts);
+
+FbankOptions FbankOptionsFromDict(py::dict dict);
+py::dict AsDict(const FbankOptions &opts);
+
+}  // namespace knf
+
+#endif  // KALDI_NATIVE_FBANK_PYTHON_CSRC_UTILS_H_
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/kaldi_native_fbank/__init__.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/kaldi_native_fbank/__init__.py
new file mode 100644
index 0000000..598f022
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/kaldi_native_fbank/__init__.py
@@ -0,0 +1,6 @@
+from _kaldi_native_fbank import (
+    FrameExtractionOptions,
+    MelBanksOptions,
+    OnlineFbank,
+    FbankOptions,
+)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/CMakeLists.txt
new file mode 100644
index 0000000..2c02a84
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/CMakeLists.txt
@@ -0,0 +1,31 @@
+function(kaldi_native_fbank_add_py_test source)
+  get_filename_component(name ${source} NAME_WE)
+  set(name "${name}_py")
+
+    message(STATUS "source: ${source}")
+
+  add_test(NAME ${name}
+    COMMAND
+      "${PYTHON_EXECUTABLE}"
+      "${CMAKE_CURRENT_SOURCE_DIR}/${source}"
+  )
+
+  get_filename_component(kaldi_native_fbank_path ${CMAKE_CURRENT_LIST_DIR} DIRECTORY)
+
+  set_property(TEST ${name}
+    PROPERTY ENVIRONMENT "PYTHONPATH=${kaldi_native_fbank_path}:$<TARGET_FILE_DIR:_kaldi_native_fbank>:$ENV{PYTHONPATH}"
+  )
+endfunction()
+
+# please sort the files in alphabetic order
+set(py_test_files
+  test_frame_extraction_options.py
+  test_mel_bank_options.py
+  test_fbank_options.py
+)
+
+if(KALDI_NATIVE_FBANK_BUILD_TESTS)
+  foreach(source IN LISTS py_test_files)
+    kaldi_native_fbank_add_py_test(${source})
+  endforeach()
+endif()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_fbank_options.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_fbank_options.py
new file mode 100755
index 0000000..d468912
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_fbank_options.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+#
+# Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
+
+
+import pickle
+
+import kaldi_native_fbank as knf
+
+
+def test_default():
+    opts = knf.FbankOptions()
+    assert opts.frame_opts.samp_freq == 16000
+    assert opts.frame_opts.frame_shift_ms == 10.0
+    assert opts.frame_opts.frame_length_ms == 25.0
+    assert opts.frame_opts.dither == 1.0
+    assert abs(opts.frame_opts.preemph_coeff - 0.97) < 1e-6
+    assert opts.frame_opts.remove_dc_offset is True
+    assert opts.frame_opts.window_type == "povey"
+    assert opts.frame_opts.round_to_power_of_two is True
+    assert abs(opts.frame_opts.blackman_coeff - 0.42) < 1e-6
+    assert opts.frame_opts.snip_edges is True
+
+    assert opts.mel_opts.num_bins == 23
+    assert opts.mel_opts.low_freq == 20
+    assert opts.mel_opts.high_freq == 0
+    assert opts.mel_opts.vtln_low == 100
+    assert opts.mel_opts.vtln_high == -500
+    assert opts.mel_opts.debug_mel is False
+    assert opts.mel_opts.htk_mode is False
+
+    assert opts.use_energy is False
+    assert opts.energy_floor == 0.0
+    assert opts.raw_energy is True
+    assert opts.htk_compat is False
+    assert opts.use_log_fbank is True
+    assert opts.use_power is True
+
+
+def test_set_get():
+    opts = knf.FbankOptions()
+    opts.use_energy = True
+    assert opts.use_energy is True
+
+    opts.energy_floor = 1
+    assert opts.energy_floor == 1
+
+    opts.raw_energy = False
+    assert opts.raw_energy is False
+
+    opts.htk_compat = True
+    assert opts.htk_compat is True
+
+    opts.use_log_fbank = False
+    assert opts.use_log_fbank is False
+
+    opts.use_power = False
+    assert opts.use_power is False
+
+
+def test_set_get_frame_opts():
+    opts = knf.FbankOptions()
+
+    opts.frame_opts.samp_freq = 44100
+    assert opts.frame_opts.samp_freq == 44100
+
+    opts.frame_opts.frame_shift_ms = 20.5
+    assert opts.frame_opts.frame_shift_ms == 20.5
+
+    opts.frame_opts.frame_length_ms = 1
+    assert opts.frame_opts.frame_length_ms == 1
+
+    opts.frame_opts.dither = 0.5
+    assert opts.frame_opts.dither == 0.5
+
+    opts.frame_opts.preemph_coeff = 0.25
+    assert opts.frame_opts.preemph_coeff == 0.25
+
+    opts.frame_opts.remove_dc_offset = False
+    assert opts.frame_opts.remove_dc_offset is False
+
+    opts.frame_opts.window_type = "hanning"
+    assert opts.frame_opts.window_type == "hanning"
+
+    opts.frame_opts.round_to_power_of_two = False
+    assert opts.frame_opts.round_to_power_of_two is False
+
+    opts.frame_opts.blackman_coeff = 0.25
+    assert opts.frame_opts.blackman_coeff == 0.25
+
+    opts.frame_opts.snip_edges = False
+    assert opts.frame_opts.snip_edges is False
+
+
+def test_set_get_mel_opts():
+    opts = knf.FbankOptions()
+
+    opts.mel_opts.num_bins = 100
+    assert opts.mel_opts.num_bins == 100
+
+    opts.mel_opts.low_freq = 22
+    assert opts.mel_opts.low_freq == 22
+
+    opts.mel_opts.high_freq = 1
+    assert opts.mel_opts.high_freq == 1
+
+    opts.mel_opts.vtln_low = 101
+    assert opts.mel_opts.vtln_low == 101
+
+    opts.mel_opts.vtln_high = -100
+    assert opts.mel_opts.vtln_high == -100
+
+    opts.mel_opts.debug_mel = True
+    assert opts.mel_opts.debug_mel is True
+
+    opts.mel_opts.htk_mode = True
+    assert opts.mel_opts.htk_mode is True
+
+
+def test_from_empty_dict():
+    opts = knf.FbankOptions.from_dict({})
+    opts2 = knf.FbankOptions()
+
+    assert str(opts) == str(opts2)
+
+
+def test_from_dict_partial():
+    d = {
+        "energy_floor": 10.5,
+        "htk_compat": True,
+        "mel_opts": {"num_bins": 80, "vtln_low": 1},
+        "frame_opts": {"window_type": "hanning"},
+    }
+    opts = knf.FbankOptions.from_dict(d)
+    assert opts.energy_floor == 10.5
+    assert opts.htk_compat is True
+    assert opts.mel_opts.num_bins == 80
+    assert opts.mel_opts.vtln_low == 1
+    assert opts.frame_opts.window_type == "hanning"
+
+    mel_opts = knf.MelBanksOptions.from_dict(d["mel_opts"])
+    assert str(opts.mel_opts) == str(mel_opts)
+
+
+def test_from_dict_full_and_as_dict():
+    opts = knf.FbankOptions()
+    opts.htk_compat = True
+    opts.mel_opts.num_bins = 80
+    opts.frame_opts.samp_freq = 10
+
+    d = opts.as_dict()
+    assert d["htk_compat"] is True
+    assert d["mel_opts"]["num_bins"] == 80
+    assert d["frame_opts"]["samp_freq"] == 10
+
+    mel_opts = knf.MelBanksOptions()
+    mel_opts.num_bins = 80
+    assert d["mel_opts"] == mel_opts.as_dict()
+
+    frame_opts = knf.FrameExtractionOptions()
+    frame_opts.samp_freq = 10
+    assert d["frame_opts"] == frame_opts.as_dict()
+
+    opts2 = knf.FbankOptions.from_dict(d)
+    assert str(opts2) == str(opts)
+
+    d["htk_compat"] = False
+    opts3 = knf.FbankOptions.from_dict(d)
+    assert opts3.htk_compat is False
+
+
+def test_pickle():
+    opts = knf.FbankOptions()
+    opts.use_energy = True
+    opts.use_power = False
+
+    opts.frame_opts.samp_freq = 44100
+    opts.mel_opts.num_bins = 100
+
+    data = pickle.dumps(opts)
+
+    opts2 = pickle.loads(data)
+    assert str(opts) == str(opts2)
+
+
+def main():
+    test_default()
+    test_set_get()
+    test_set_get_frame_opts()
+    test_set_get_mel_opts()
+    test_from_empty_dict()
+    test_from_dict_partial()
+    test_from_dict_full_and_as_dict()
+    test_pickle()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_frame_extraction_options.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_frame_extraction_options.py
new file mode 100755
index 0000000..2b16efe
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_frame_extraction_options.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+#
+# Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
+
+import pickle
+
+import kaldi_native_fbank as knf
+
+
+def test_default():
+    opts = knf.FrameExtractionOptions()
+    assert opts.samp_freq == 16000
+    assert opts.frame_shift_ms == 10.0
+    assert opts.frame_length_ms == 25.0
+    assert opts.dither == 1.0
+    assert abs(opts.preemph_coeff - 0.97) < 1e-6
+    assert opts.remove_dc_offset is True
+    assert opts.window_type == "povey"
+    assert opts.round_to_power_of_two is True
+    assert abs(opts.blackman_coeff - 0.42) < 1e-6
+    assert opts.snip_edges is True
+
+
+def test_set_get():
+    opts = knf.FrameExtractionOptions()
+    opts.samp_freq = 44100
+    assert opts.samp_freq == 44100
+
+    opts.frame_shift_ms = 20.5
+    assert opts.frame_shift_ms == 20.5
+
+    opts.frame_length_ms = 1
+    assert opts.frame_length_ms == 1
+
+    opts.dither = 0.5
+    assert opts.dither == 0.5
+
+    opts.preemph_coeff = 0.25
+    assert opts.preemph_coeff == 0.25
+
+    opts.remove_dc_offset = False
+    assert opts.remove_dc_offset is False
+
+    opts.window_type = "hanning"
+    assert opts.window_type == "hanning"
+
+    opts.round_to_power_of_two = False
+    assert opts.round_to_power_of_two is False
+
+    opts.blackman_coeff = 0.25
+    assert opts.blackman_coeff == 0.25
+
+    opts.snip_edges = False
+    assert opts.snip_edges is False
+
+
+def test_from_empty_dict():
+    opts = knf.FrameExtractionOptions.from_dict({})
+    opts2 = knf.FrameExtractionOptions()
+
+    assert str(opts) == str(opts2)
+
+
+def test_from_dict_partial():
+    d = {"samp_freq": 10, "frame_shift_ms": 2}
+
+    opts = knf.FrameExtractionOptions.from_dict(d)
+
+    opts2 = knf.FrameExtractionOptions()
+    assert str(opts) != str(opts2)
+
+    opts2.samp_freq = 10
+    assert str(opts) != str(opts2)
+
+    opts2.frame_shift_ms = 2
+    assert str(opts) == str(opts2)
+
+    opts2.frame_shift_ms = 3
+    assert str(opts) != str(opts2)
+
+
+def test_from_dict_full_and_as_dict():
+    opts = knf.FrameExtractionOptions()
+    opts.samp_freq = 20
+    opts.frame_length_ms = 100
+
+    d = opts.as_dict()
+    for key, value in d.items():
+        assert value == getattr(opts, key)
+
+    opts2 = knf.FrameExtractionOptions.from_dict(d)
+    assert str(opts2) == str(opts)
+
+    d["window_type"] = "hanning"
+    opts3 = knf.FrameExtractionOptions.from_dict(d)
+    assert opts3.window_type == "hanning"
+
+
+def test_pickle():
+    opts = knf.FrameExtractionOptions()
+    opts.samp_freq = 44100
+    opts.dither = 5.5
+    data = pickle.dumps(opts)
+
+    opts2 = pickle.loads(data)
+    assert str(opts) == str(opts2)
+
+
+def main():
+    test_default()
+    test_set_get()
+    test_from_empty_dict()
+    test_from_dict_partial()
+    test_from_dict_full_and_as_dict()
+    test_pickle()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_mel_bank_options.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_mel_bank_options.py
new file mode 100755
index 0000000..1135c26
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_mel_bank_options.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+#
+# Copyright (c)  2021  Xiaomi Corporation (authors: Fangjun Kuang)
+
+import pickle
+
+import kaldi_native_fbank as knf
+
+
+def test_default():
+    opts = knf.MelBanksOptions()
+    assert opts.num_bins == 25
+    assert opts.low_freq == 20
+    assert opts.high_freq == 0
+    assert opts.vtln_low == 100
+    assert opts.vtln_high == -500
+    assert opts.debug_mel is False
+    assert opts.htk_mode is False
+
+
+def test_set_get():
+    opts = knf.MelBanksOptions()
+    opts.num_bins = 100
+    assert opts.num_bins == 100
+
+    opts.low_freq = 22
+    assert opts.low_freq == 22
+
+    opts.high_freq = 1
+    assert opts.high_freq == 1
+
+    opts.vtln_low = 101
+    assert opts.vtln_low == 101
+
+    opts.vtln_high = -100
+    assert opts.vtln_high == -100
+
+    opts.debug_mel = True
+    assert opts.debug_mel is True
+
+    opts.htk_mode = True
+    assert opts.htk_mode is True
+
+
+def test_from_empty_dict():
+    opts = knf.MelBanksOptions.from_dict({})
+    opts2 = knf.MelBanksOptions()
+
+    assert str(opts) == str(opts2)
+
+
+def test_from_dict_partial():
+    d = {"num_bins": 10, "debug_mel": True}
+
+    opts = knf.MelBanksOptions.from_dict(d)
+
+    opts2 = knf.MelBanksOptions()
+    assert str(opts) != str(opts2)
+
+    opts2.num_bins = 10
+    assert str(opts) != str(opts2)
+
+    opts2.debug_mel = True
+    assert str(opts) == str(opts2)
+
+    opts2.debug_mel = False
+    assert str(opts) != str(opts2)
+
+
+def test_from_dict_full_and_as_dict():
+    opts = knf.MelBanksOptions()
+    opts.num_bins = 80
+    opts.vtln_high = 2
+
+    d = opts.as_dict()
+    for key, value in d.items():
+        assert value == getattr(opts, key)
+
+    opts2 = knf.MelBanksOptions.from_dict(d)
+    assert str(opts2) == str(opts)
+
+    d["htk_mode"] = True
+    opts3 = knf.MelBanksOptions.from_dict(d)
+    assert opts3.htk_mode is True
+
+
+def test_pickle():
+    opts = knf.MelBanksOptions()
+    opts.num_bins = 100
+    opts.low_freq = 22
+    data = pickle.dumps(opts)
+
+    opts2 = pickle.loads(data)
+    assert str(opts) == str(opts2)
+
+
+def main():
+    test_default()
+    test_set_get()
+    test_from_empty_dict()
+    test_from_dict_partial()
+    test_from_dict_full_and_as_dict()
+    test_pickle()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_online_fbank.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_online_fbank.py
new file mode 100755
index 0000000..12f2c66
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/kaldi-native-fbank/python/tests/test_online_fbank.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+import sys
+
+try:
+    import kaldifeat
+except:
+    print("Please install kaldifeat first")
+    sys.exit(0)
+
+import kaldi_native_fbank as knf
+import torch
+
+
+def main():
+    sampling_rate = 16000
+    samples = torch.randn(16000 * 10)
+
+    opts = kaldifeat.FbankOptions()
+    opts.frame_opts.dither = 0
+    opts.mel_opts.num_bins = 80
+    opts.frame_opts.snip_edges = False
+    opts.mel_opts.debug_mel = False
+
+    online_fbank = kaldifeat.OnlineFbank(opts)
+
+    online_fbank.accept_waveform(sampling_rate, samples)
+
+    opts = knf.FbankOptions()
+    opts.frame_opts.dither = 0
+    opts.mel_opts.num_bins = 80
+    opts.frame_opts.snip_edges = False
+    opts.mel_opts.debug_mel = False
+
+    fbank = knf.OnlineFbank(opts)
+    fbank.accept_waveform(sampling_rate, samples.tolist())
+
+    assert online_fbank.num_frames_ready == fbank.num_frames_ready
+    for i in range(fbank.num_frames_ready):
+        f1 = online_fbank.get_frame(i)
+        f2 = torch.from_numpy(fbank.get_frame(i))
+        assert torch.allclose(f1, f2, atol=1e-3), (i, (f1 - f2).abs().max())
+
+
+if __name__ == "__main__":
+    torch.manual_seed(20220825)
+    main()
+    print("success")
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/check_style_cpplint.sh b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/check_style_cpplint.sh
new file mode 100755
index 0000000..2fc150e
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/check_style_cpplint.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+#
+# Copyright      2020  Mobvoi Inc. (authors: Fangjun Kuang)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Usage:
+#
+# (1) To check files of the last commit
+#  ./scripts/check_style_cpplint.sh
+#
+# (2) To check changed files not committed yet
+#  ./scripts/check_style_cpplint.sh 1
+#
+# (3) To check all files in the project
+#  ./scripts/check_style_cpplint.sh 2
+
+
+cpplint_version="1.5.4"
+cur_dir=$(cd $(dirname $BASH_SOURCE) && pwd)
+kaldi_native_fbank_dir=$(cd $cur_dir/.. && pwd)
+
+build_dir=$kaldi_native_fbank_dir/build
+mkdir -p $build_dir
+
+cpplint_src=$build_dir/cpplint-${cpplint_version}/cpplint.py
+
+if [ ! -d "$build_dir/cpplint-${cpplint_version}" ]; then
+  pushd $build_dir
+  if command -v wget &> /dev/null; then
+    wget https://github.com/cpplint/cpplint/archive/${cpplint_version}.tar.gz
+  elif command -v curl &> /dev/null; then
+    curl -O -SL https://github.com/cpplint/cpplint/archive/${cpplint_version}.tar.gz
+  else
+    echo "Please install wget or curl to download cpplint"
+    exit 1
+  fi
+  tar xf ${cpplint_version}.tar.gz
+  rm ${cpplint_version}.tar.gz
+
+  # cpplint will report the following error for: __host__ __device__ (
+  #
+  #     Extra space before ( in function call  [whitespace/parens] [4]
+  #
+  # the following patch disables the above error
+  sed -i "3490i\        not Search(r'__host__ __device__\\\s+\\\(', fncall) and" $cpplint_src
+  popd
+fi
+
+source $kaldi_native_fbank_dir/scripts/utils.sh
+
+# return true if the given file is a c++ source file
+# return false otherwise
+function is_source_code_file() {
+  case "$1" in
+    *.cc|*.h|*.cu)
+      echo true;;
+    *)
+      echo false;;
+  esac
+}
+
+function check_style() {
+  python3 $cpplint_src $1 || abort $1
+}
+
+function check_last_commit() {
+  files=$(git diff HEAD^1 --name-only --diff-filter=ACDMRUXB)
+  echo $files
+}
+
+function check_current_dir() {
+  files=$(git status -s -uno --porcelain | awk '{
+  if (NF == 4) {
+    # a file has been renamed
+    print $NF
+  } else {
+    print $2
+  }}')
+
+  echo $files
+}
+
+function do_check() {
+  case "$1" in
+    1)
+      echo "Check changed files"
+      files=$(check_current_dir)
+      ;;
+    2)
+      echo "Check all files"
+      files=$(find $kaldi_native_fbank_dir/kaldi-native-fbank -name "*.h" -o -name "*.cc" -o -name "*.cu")
+      ;;
+    *)
+      echo "Check last commit"
+      files=$(check_last_commit)
+      ;;
+  esac
+
+  for f in $files; do
+    need_check=$(is_source_code_file $f)
+    if $need_check; then
+      [[ -f $f ]] && check_style $f
+    fi
+  done
+}
+
+function main() {
+  do_check $1
+
+  ok "Great! Style check passed!"
+}
+
+cd $kaldi_native_fbank_dir
+
+main $1
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/utils.sh b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/utils.sh
new file mode 100644
index 0000000..fb424a7
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/scripts/utils.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+default='\033[0m'
+bold='\033[1m'
+red='\033[31m'
+green='\033[32m'
+
+function ok() {
+  printf "${bold}${green}[OK]${default} $1\n"
+}
+
+function error() {
+  printf "${bold}${red}[FAILED]${default} $1\n"
+}
+
+function abort() {
+  printf "${bold}${red}[FAILED]${default} $1\n"
+  exit 1
+}
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/setup.py b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/setup.py
new file mode 100644
index 0000000..de3d8a2
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/setup.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+#
+# Copyright (c)  2021  Xiaomi Corporation (author: Fangjun Kuang)
+
+import re
+
+import setuptools
+
+from cmake.cmake_extension import BuildExtension, bdist_wheel, cmake_extension
+
+
+def read_long_description():
+    with open("README.md", encoding="utf8") as f:
+        readme = f.read()
+    return readme
+
+
+def get_package_version():
+    with open("CMakeLists.txt") as f:
+        content = f.read()
+
+    match = re.search(r"set\(KALDI_NATIVE_FBANK_VERSION (.*)\)", content)
+    latest_version = match.group(1).strip('"')
+    return latest_version
+
+
+package_name = "kaldi-native-fbank"
+
+with open("kaldi-native-fbank/python/kaldi_native_fbank/__init__.py", "a") as f:
+    f.write(f"__version__ = '{get_package_version()}'\n")
+
+setuptools.setup(
+    name=package_name,
+    version=get_package_version(),
+    author="Fangjun Kuang",
+    author_email="csukuangfj@gmail.com",
+    package_dir={"kaldi_native_fbank": "kaldi-native-fbank/python/kaldi_native_fbank"},
+    packages=["kaldi_native_fbank"],
+    url="https://github.com/csukuangfj/kaldi-native-fbank",
+    long_description=read_long_description(),
+    long_description_content_type="text/markdown",
+    ext_modules=[cmake_extension("_kaldi_native_fbank")],
+    cmdclass={"build_ext": BuildExtension, "bdist_wheel": bdist_wheel},
+    zip_safe=False,
+    classifiers=[
+        "Programming Language :: C++",
+        "Programming Language :: Python",
+        "Programming Language :: Python :: 3",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+    python_requires=">=3.6.0",
+    license="Apache licensed, as found in the LICENSE file",
+)
+
+# remove the line __version__ from kaldi-native-fbank/python/kaldi_native_fbank/__init__.py
+with open("kaldi-native-fbank/python/kaldi_native_fbank/__init__.py", "r") as f:
+    lines = f.readlines()
+
+with open("kaldi-native-fbank/python/kaldi_native_fbank/__init__.py", "w") as f:
+    for line in lines:
+        if "__version__" in line:
+            # skip __version__ = "x.x.x"
+            continue
+        f.write(line)
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/README.md b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/README.md
new file mode 100644
index 0000000..41924c0
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/README.md
@@ -0,0 +1,11 @@
+# gcc-arm-8.3-2019.03-x86_64-arm-linux-gnueabihf.tar.xz
+
+Go to <https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads/8-3-2019-03> to download the toolchain.
+
+```bash
+mkdir /ceph-fj/fangjun/software
+cd /ceph-fj/fangjun/software
+tar xvf /path/to/gcc-arm-8.3-2019.03-x86_64-arm-linux-gnueabihf.tar.xz
+
+export PATH=/ceph-fj/fangjun/software/gcc-arm-8.3-2019.03-x86_64-arm-linux-gnueabihf/bin:$PATH
+```
diff --git a/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/arm-linux-gnueabihf.toolchain.cmake b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/arm-linux-gnueabihf.toolchain.cmake
new file mode 100644
index 0000000..abe1a22
--- /dev/null
+++ b/funasr/runtime/onnxruntime/third_party/kaldi-native-fbank/toolchains/arm-linux-gnueabihf.toolchain.cmake
@@ -0,0 +1,17 @@
+# Copied from https://github.com/Tencent/ncnn/blob/master/toolchains/arm-linux-gnueabihf.toolchain.cmake
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR arm)
+
+set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
+set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon")
+set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon")
+
+# cache flags
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")
diff --git a/funasr/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt b/funasr/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt
index 37d1624..491f41a 100644
--- a/funasr/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt
@@ -38,9 +38,9 @@
 ### Project options
 ###
 ## Project stuff
-option(YAML_CPP_BUILD_TESTS "Enable testing" ON)
-option(YAML_CPP_BUILD_TOOLS "Enable parse tools" ON)
-option(YAML_CPP_BUILD_CONTRIB "Enable contrib stuff in library" ON)
+option(YAML_CPP_BUILD_TESTS "Enable testing" OFF)
+option(YAML_CPP_BUILD_TOOLS "Enable parse tools" OFF)
+option(YAML_CPP_BUILD_CONTRIB "Enable contrib stuff in library" OFF)
 
 ## Build options
 # --> General

--
Gitblit v1.9.1