python/FunASR-XL.git

parent: f0fdc051 | 补丁 | 提交 | ignore whitespace

Merge pull request #353 from alibaba-damo-academy/dev_lyb

zhifu gao

2023-04-14 ddfcd68c803c226f4b2a60b0666b29c24d04c259

Merge pull request #353 from alibaba-damo-academy/dev_lyb

support arbitrary sampling rate

9个文件已修改

2个文件已添加

	funasr/runtime/onnxruntime/CMakeLists.txt	19 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/onnxruntime/include/Audio.h	17 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/onnxruntime/include/libfunasrapi.h	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/onnxruntime/src/Audio.cpp	262 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/onnxruntime/src/CMakeLists.txt	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/onnxruntime/src/Vocab.cpp	15 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/onnxruntime/src/libfunasrapi.cpp	16 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/onnxruntime/src/paraformer_onnx.cpp	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/onnxruntime/src/precomp.h	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/onnxruntime/src/resample.cc	305 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/onnxruntime/src/resample.h	137 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 funasr/runtime/onnxruntime/CMakeLists.txt

@@ -2,24 +2,27 @@

project(FunASRonnx)

set(CMAKE_CXX_STANDARD 11)
# set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

include(TestBigEndian)
test_big_endian(BIG_ENDIAN)
if(BIG_ENDIAN)
    message("Big endian system")
else()
    message("Little endian system")
endif()

# for onnxruntime

IF(WIN32)


    if(CMAKE_CL_64)
        link_directories(${ONNXRUNTIME_DIR}\\lib)
    else()
        add_definitions(-D_WIN_X86)
    endif()
ELSE()


link_directories(${ONNXRUNTIME_DIR}/lib)

    link_directories(${ONNXRUNTIME_DIR}/lib)
endif()

add_subdirectory("./third_party/yaml-cpp")

 funasr/runtime/onnxruntime/include/Audio.h

@@ -6,6 +6,13 @@
#include <queue>
#include <stdint.h>

#ifndef model_sample_rate
#define model_sample_rate 16000
#endif
#ifndef WAV_HEADER_SIZE
#define WAV_HEADER_SIZE 44
#endif

using namespace std;

class AudioFrame {
@@ -32,7 +39,6 @@
    int16_t *speech_buff;
    int speech_len;
    int speech_align_len;
    int16_t sample_rate;
    int offset;
    float align_size;
    int data_type;
@@ -43,10 +49,11 @@
    Audio(int data_type, int size);
    ~Audio();
    void disp();
    bool loadwav(const char* filename);
    bool loadwav(const char* buf, int nLen);
    bool loadpcmwav(const char* buf, int nFileLen);
    bool loadpcmwav(const char* filename);
    bool loadwav(const char* filename, int32_t* sampling_rate);
    void wavResample(int32_t sampling_rate, const float *waveform, int32_t n);
    bool loadwav(const char* buf, int nLen, int32_t* sampling_rate);
    bool loadpcmwav(const char* buf, int nFileLen, int32_t* sampling_rate);
    bool loadpcmwav(const char* filename, int32_t* sampling_rate);
    int fetch_chunck(float *&dout, int len);
    int fetch(float *&dout, int &len, int &flag);
    void padding();

 funasr/runtime/onnxruntime/include/libfunasrapi.h

@@ -55,9 +55,9 @@
// if not give a fnCallback ,it should be NULL 
_FUNASRAPI FUNASR_RESULT    FunASRRecogBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback);

_FUNASRAPI FUNASR_RESULT    FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
_FUNASRAPI FUNASR_RESULT    FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback);

_FUNASRAPI FUNASR_RESULT    FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
_FUNASRAPI FUNASR_RESULT    FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback);

_FUNASRAPI FUNASR_RESULT    FunASRRecogFile(FUNASR_HANDLE handle, const char* szWavfile, FUNASR_MODE Mode, QM_CALLBACK fnCallback);


 funasr/runtime/onnxruntime/src/Audio.cpp

@@ -3,10 +3,95 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fstream>
#include <assert.h>

#include "Audio.h"
#include "precomp.h"

using namespace std;

// see http://soundfile.sapp.org/doc/WaveFormat/
// Note: We assume little endian here
struct WaveHeader {
  bool Validate() const {
    //                 F F I R
    if (chunk_id != 0x46464952) {
      printf("Expected chunk_id RIFF. Given: 0x%08x\n", chunk_id);
      return false;
    }
    //               E V A W
    if (format != 0x45564157) {
      printf("Expected format WAVE. Given: 0x%08x\n", format);
      return false;
    }

    if (subchunk1_id != 0x20746d66) {
      printf("Expected subchunk1_id 0x20746d66. Given: 0x%08x\n",
                       subchunk1_id);
      return false;
    }

    if (subchunk1_size != 16) {  // 16 for PCM
      printf("Expected subchunk1_size 16. Given: %d\n",
                       subchunk1_size);
      return false;
    }

    if (audio_format != 1) {  // 1 for PCM
      printf("Expected audio_format 1. Given: %d\n", audio_format);
      return false;
    }

    if (num_channels != 1) {  // we support only single channel for now
      printf("Expected single channel. Given: %d\n", num_channels);
      return false;
    }
    if (byte_rate != (sample_rate * num_channels * bits_per_sample / 8)) {
      return false;
    }

    if (block_align != (num_channels * bits_per_sample / 8)) {
      return false;
    }

    if (bits_per_sample != 16) {  // we support only 16 bits per sample
      printf("Expected bits_per_sample 16. Given: %d\n",
                       bits_per_sample);
      return false;
    }
    return true;
  }

  // See https://en.wikipedia.org/wiki/WAV#Metadata and
  // https://www.robotplanet.dk/audio/wav_meta_data/riff_mci.pdf
  void SeekToDataChunk(std::istream &is) {
    //                              a t a d
    while (is && subchunk2_id != 0x61746164) {
      // const char *p = reinterpret_cast<const char *>(&subchunk2_id);
      // printf("Skip chunk (%x): %c%c%c%c of size: %d\n", subchunk2_id, p[0],
      //        p[1], p[2], p[3], subchunk2_size);
      is.seekg(subchunk2_size, std::istream::cur);
      is.read(reinterpret_cast<char *>(&subchunk2_id), sizeof(int32_t));
      is.read(reinterpret_cast<char *>(&subchunk2_size), sizeof(int32_t));
    }
  }

  int32_t chunk_id;
  int32_t chunk_size;
  int32_t format;
  int32_t subchunk1_id;
  int32_t subchunk1_size;
  int16_t audio_format;
  int16_t num_channels;
  int32_t sample_rate;
  int32_t byte_rate;
  int16_t block_align;
  int16_t bits_per_sample;
  int32_t subchunk2_id;    // a tag of this chunk
  int32_t subchunk2_size;  // size of subchunk2
};
static_assert(sizeof(WaveHeader) == WAV_HEADER_SIZE, "");

class AudioWindow {
  private:
@@ -56,7 +141,7 @@
    float frame_length = 400;
    float frame_shift = 160;
    float num_new_samples =
        ceil((num_samples - 400) / frame_shift) * frame_shift + frame_length;
        ceil((num_samples - frame_length) / frame_shift) * frame_shift + frame_length;

    end = start + num_new_samples;
    len = (int)num_new_samples;
@@ -111,62 +196,95 @@

void Audio::disp()
{
    printf("Audio time is %f s. len is %d\n", (float)speech_len / 16000,
    printf("Audio time is %f s. len is %d\n", (float)speech_len / model_sample_rate,
           speech_len);
}

float Audio::get_time_len()
{
    return (float)speech_len / 16000;
       //speech_len);
    return (float)speech_len / model_sample_rate;
}

bool Audio::loadwav(const char *filename)
void Audio::wavResample(int32_t sampling_rate, const float *waveform,
                          int32_t n)
{
    printf(
          "Creating a resampler:\n"
          "   in_sample_rate: %d\n"
          "   output_sample_rate: %d\n",
          sampling_rate, static_cast<int32_t>(model_sample_rate));
    float min_freq =
        std::min<int32_t>(sampling_rate, model_sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;

    int32_t lowpass_filter_width = 6;
    //FIXME
    //auto resampler = new LinearResample(
    //      sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
    auto resampler = std::make_unique<LinearResample>(
          sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
    std::vector<float> samples;
    resampler->Resample(waveform, n, true, &samples);
    //reset speech_data
    speech_len = samples.size();
    if (speech_data != NULL) {
        free(speech_data);
    }
    speech_data = (float*)malloc(sizeof(float) * speech_len);
    memset(speech_data, 0, sizeof(float) * speech_len);
    copy(samples.begin(), samples.end(), speech_data);
}

bool Audio::loadwav(const char *filename, int32_t* sampling_rate)
{
    WaveHeader header;
    if (speech_data != NULL) {
        free(speech_data);
    }
    if (speech_buff != NULL) {
        free(speech_buff);
    }

    
    offset = 0;

    FILE *fp;
    fp = fopen(filename, "rb");
    if (fp == nullptr)
    std::ifstream is(filename, std::ifstream::binary);
    is.read(reinterpret_cast<char *>(&header), sizeof(header));
    if(!is){
        fprintf(stderr, "Failed to read %s\n", filename);
        return false;
    fseek(fp, 0, SEEK_END);  /*定位到文件末尾*/
    uint32_t nFileLen = ftell(fp);  /*得到文件大小*/
    fseek(fp, 44, SEEK_SET);  /*跳过wav文件头*/

    speech_len = (nFileLen - 44) / 2;
    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
    speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_align_len);
    }
    
    *sampling_rate = header.sample_rate;
    // header.subchunk2_size contains the number of bytes in the data.
    // As we assume each sample contains two bytes, so it is divided by 2 here
    speech_len = header.subchunk2_size / 2;
    speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_len);

    if (speech_buff)
    {
        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
        int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
        fclose(fp);
        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
        is.read(reinterpret_cast<char *>(speech_buff), header.subchunk2_size);
        if (!is) {
            fprintf(stderr, "Failed to read %s\n", filename);
            return false;
        }
        speech_data = (float*)malloc(sizeof(float) * speech_len);
        memset(speech_data, 0, sizeof(float) * speech_len);

        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
        memset(speech_data, 0, sizeof(float) * speech_align_len);
        int i;
        float scale = 1;

        if (data_type == 1) {
            scale = 32768;
        }

        for (i = 0; i < speech_len; i++) {
        for (int32_t i = 0; i != speech_len; ++i) {
            speech_data[i] = (float)speech_buff[i] / scale;
        }

        //resample
        if(*sampling_rate != model_sample_rate){
            wavResample(*sampling_rate, speech_data, speech_len);
        }

        AudioFrame* frame = new AudioFrame(speech_len);
        frame_queue.push(frame);


        return true;
    }
@@ -174,57 +292,54 @@
        return false;
}


bool Audio::loadwav(const char* buf, int nFileLen)
bool Audio::loadwav(const char* buf, int nFileLen, int32_t* sampling_rate)
{

    

    WaveHeader header;
    if (speech_data != NULL) {
        free(speech_data);
    }
    if (speech_buff != NULL) {
        free(speech_buff);
    }

    offset = 0;

    size_t nOffset = 0;
    std::memcpy(&header, buf, sizeof(header));

#define WAV_HEADER_SIZE 44

    speech_len = (nFileLen - WAV_HEADER_SIZE) / 2;
    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
    *sampling_rate = header.sample_rate;
    speech_len = header.subchunk2_size / 2;
    speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_len);
    if (speech_buff)
    {
        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
        memcpy((void*)speech_buff, (const void*)(buf + WAV_HEADER_SIZE), speech_len * sizeof(int16_t));

        speech_data = (float*)malloc(sizeof(float) * speech_len);
        memset(speech_data, 0, sizeof(float) * speech_len);

        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
        memset(speech_data, 0, sizeof(float) * speech_align_len);
        int i;
        float scale = 1;

        if (data_type == 1) {
            scale = 32768;
        }

        for (i = 0; i < speech_len; i++) {
        for (int32_t i = 0; i != speech_len; ++i) {
            speech_data[i] = (float)speech_buff[i] / scale;
        }
        
        //resample
        if(*sampling_rate != model_sample_rate){
            wavResample(*sampling_rate, speech_data, speech_len);
        }

        AudioFrame* frame = new AudioFrame(speech_len);
        frame_queue.push(frame);

        return true;
    }
    else
        return false;

}


bool Audio::loadpcmwav(const char* buf, int nBufLen)
bool Audio::loadpcmwav(const char* buf, int nBufLen, int32_t* sampling_rate)
{
    if (speech_data != NULL) {
        free(speech_data);
@@ -234,32 +349,28 @@
    }
    offset = 0;

    size_t nOffset = 0;



    speech_len = nBufLen / 2;
    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
    if (speech_buff)
    {
        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
        memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));

        speech_data = (float*)malloc(sizeof(float) * speech_len);
        memset(speech_data, 0, sizeof(float) * speech_len);

        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
        memset(speech_data, 0, sizeof(float) * speech_align_len);

     
        int i;
        float scale = 1;

        if (data_type == 1) {
            scale = 32768;
        }

        for (i = 0; i < speech_len; i++) {
        for (int32_t i = 0; i != speech_len; ++i) {
            speech_data[i] = (float)speech_buff[i] / scale;
        }
        
        //resample
        if(*sampling_rate != model_sample_rate){
            wavResample(*sampling_rate, speech_data, speech_len);
        }

        AudioFrame* frame = new AudioFrame(speech_len);
@@ -269,13 +380,10 @@
    }
    else
        return false;

    
}

bool Audio::loadpcmwav(const char* filename)
bool Audio::loadpcmwav(const char* filename, int32_t* sampling_rate)
{

    if (speech_data != NULL) {
        free(speech_data);
    }
@@ -293,34 +401,31 @@
    fseek(fp, 0, SEEK_SET);

    speech_len = (nFileLen) / 2;
    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
    if (speech_buff)
    {
        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
        int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
        fclose(fp);

        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
        memset(speech_data, 0, sizeof(float) * speech_align_len);
        speech_data = (float*)malloc(sizeof(float) * speech_len);
        memset(speech_data, 0, sizeof(float) * speech_len);



        int i;
        float scale = 1;

        if (data_type == 1) {
            scale = 32768;
        }

        for (i = 0; i < speech_len; i++) {
        for (int32_t i = 0; i != speech_len; ++i) {
            speech_data[i] = (float)speech_buff[i] / scale;
        }

        //resample
        if(*sampling_rate != model_sample_rate){
            wavResample(*sampling_rate, speech_data, speech_len);
        }

        AudioFrame* frame = new AudioFrame(speech_len);
        frame_queue.push(frame);

    
        return true;
    }
@@ -328,7 +433,6 @@
        return false;

}


int Audio::fetch_chunck(float *&dout, int len)
{

 funasr/runtime/onnxruntime/src/CMakeLists.txt

@@ -1,5 +1,6 @@

file(GLOB files1 "*.cpp")
file(GLOB files2 "*.cc")
file(GLOB files4 "paraformer/*.cpp")

set(files ${files1} ${files2} ${files3} ${files4})

 funasr/runtime/onnxruntime/src/Vocab.cpp

@@ -13,21 +13,6 @@
{
    ifstream in(filename);
    loadVocabFromYaml(filename);

    /*
    string line;
    if (in) // 有该文件
    {
        while (getline(in, line)) // line中不包括每行的换行符
        {
            vocab.push_back(line);
        }
    }
    else{
        printf("Cannot load vocab from: %s, there must be file vocab.txt", filename);
        exit(-1);
    }
    */
}
Vocab::~Vocab()
{

 funasr/runtime/onnxruntime/src/libfunasrapi.cpp

@@ -17,8 +17,9 @@
        if (!pRecogObj)
            return nullptr;

        int32_t sampling_rate = -1;
        Audio audio(1);
        if (!audio.loadwav(szBuf, nLen))
        if (!audio.loadwav(szBuf, nLen, &sampling_rate))
            return nullptr;
        //audio.split();

@@ -41,14 +42,14 @@
        return pResult;
    }

    _FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
    _FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
    {
        Model* pRecogObj = (Model*)handle;
        if (!pRecogObj)
            return nullptr;

        Audio audio(1);
        if (!audio.loadpcmwav(szBuf, nLen))
        if (!audio.loadpcmwav(szBuf, nLen, &sampling_rate))
            return nullptr;
        //audio.split();

@@ -71,14 +72,14 @@
        return pResult;
    }

    _FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
    _FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
    {
        Model* pRecogObj = (Model*)handle;
        if (!pRecogObj)
            return nullptr;

        Audio audio(1);
        if (!audio.loadpcmwav(szFileName))
        if (!audio.loadpcmwav(szFileName, &sampling_rate))
            return nullptr;
        //audio.split();

@@ -106,9 +107,10 @@
        Model* pRecogObj = (Model*)handle;
        if (!pRecogObj)
            return nullptr;

		
        int32_t sampling_rate = -1;
        Audio audio(1);
        if(!audio.loadwav(szWavfile))
        if(!audio.loadwav(szWavfile, &sampling_rate))
            return nullptr;
        //audio.split();


 funasr/runtime/onnxruntime/src/paraformer_onnx.cpp

@@ -70,7 +70,6 @@

void ModelImp::reset()
{
    printf("Not Imp!!!!!!\n");
}

void ModelImp::apply_lfr(Tensor<float>*& din)

 funasr/runtime/onnxruntime/src/precomp.h

@@ -44,6 +44,7 @@
#include "FeatureQueue.h"
#include "SpeechWrap.h"
#include <Audio.h>
#include "resample.h"
#include "Model.h"
#include "paraformer_onnx.h"
#include "libfunasrapi.h"

 funasr/runtime/onnxruntime/src/resample.cc

New file
@@ -0,0 +1,305 @@
/**
 * Copyright     2013  Pegah Ghahremani
 *               2014  IMSL, PKU-HKUST (author: Wei Shi)
 *               2014  Yanqing Sun, Junjie Wang
 *               2014  Johns Hopkins University (author: Daniel Povey)
 * Copyright     2023  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
// this file is copied and modified from
// kaldi/src/feat/resample.cc

#include "resample.h"

#include <assert.h>
#include <math.h>
#include <stdio.h>

#include <cstdlib>
#include <type_traits>

#ifndef M_2PI
#define M_2PI 6.283185307179586476925286766559005
#endif

#ifndef M_PI
#define M_PI 3.1415926535897932384626433832795
#endif

template <class I>
I Gcd(I m, I n) {
  // this function is copied from kaldi/src/base/kaldi-math.h
  if (m == 0 || n == 0) {
    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
      fprintf(stderr, "Undefined GCD since m = 0, n = 0.\n");
      exit(-1);
    }
    return (m == 0 ? (n > 0 ? n : -n) : (m > 0 ? m : -m));
    // return absolute value of whichever is nonzero
  }
  // could use compile-time assertion
  // but involves messing with complex template stuff.
  static_assert(std::is_integral<I>::value, "");
  while (1) {
    m %= n;
    if (m == 0) return (n > 0 ? n : -n);
    n %= m;
    if (n == 0) return (m > 0 ? m : -m);
  }
}

/// Returns the least common multiple of two integers.  Will
/// crash unless the inputs are positive.
template <class I>
I Lcm(I m, I n) {
  // This function is copied from kaldi/src/base/kaldi-math.h
  assert(m > 0 && n > 0);
  I gcd = Gcd(m, n);
  return gcd * (m / gcd) * (n / gcd);
}

static float DotProduct(const float *a, const float *b, int32_t n) {
  float sum = 0;
  for (int32_t i = 0; i != n; ++i) {
    sum += a[i] * b[i];
  }
  return sum;
}

LinearResample::LinearResample(int32_t samp_rate_in_hz,
                               int32_t samp_rate_out_hz, float filter_cutoff_hz,
                               int32_t num_zeros)
    : samp_rate_in_(samp_rate_in_hz),
      samp_rate_out_(samp_rate_out_hz),
      filter_cutoff_(filter_cutoff_hz),
      num_zeros_(num_zeros) {
  assert(samp_rate_in_hz > 0.0 && samp_rate_out_hz > 0.0 &&
         filter_cutoff_hz > 0.0 && filter_cutoff_hz * 2 <= samp_rate_in_hz &&
         filter_cutoff_hz * 2 <= samp_rate_out_hz && num_zeros > 0);

  // base_freq is the frequency of the repeating unit, which is the gcd
  // of the input frequencies.
  int32_t base_freq = Gcd(samp_rate_in_, samp_rate_out_);
  input_samples_in_unit_ = samp_rate_in_ / base_freq;
  output_samples_in_unit_ = samp_rate_out_ / base_freq;

  SetIndexesAndWeights();
  Reset();
}

void LinearResample::SetIndexesAndWeights() {
  first_index_.resize(output_samples_in_unit_);
  weights_.resize(output_samples_in_unit_);

  double window_width = num_zeros_ / (2.0 * filter_cutoff_);

  for (int32_t i = 0; i < output_samples_in_unit_; i++) {
    double output_t = i / static_cast<double>(samp_rate_out_);
    double min_t = output_t - window_width, max_t = output_t + window_width;
    // we do ceil on the min and floor on the max, because if we did it
    // the other way around we would unnecessarily include indexes just
    // outside the window, with zero coefficients.  It's possible
    // if the arguments to the ceil and floor expressions are integers
    // (e.g. if filter_cutoff_ has an exact ratio with the sample rates),
    // that we unnecessarily include something with a zero coefficient,
    // but this is only a slight efficiency issue.
    int32_t min_input_index = ceil(min_t * samp_rate_in_),
            max_input_index = floor(max_t * samp_rate_in_),
            num_indices = max_input_index - min_input_index + 1;
    first_index_[i] = min_input_index;
    weights_[i].resize(num_indices);
    for (int32_t j = 0; j < num_indices; j++) {
      int32_t input_index = min_input_index + j;
      double input_t = input_index / static_cast<double>(samp_rate_in_),
             delta_t = input_t - output_t;
      // sign of delta_t doesn't matter.
      weights_[i][j] = FilterFunc(delta_t) / samp_rate_in_;
    }
  }
}

/** Here, t is a time in seconds representing an offset from
    the center of the windowed filter function, and FilterFunction(t)
    returns the windowed filter function, described
    in the header as h(t) = f(t)g(t), evaluated at t.
*/
float LinearResample::FilterFunc(float t) const {
  float window,  // raised-cosine (Hanning) window of width
                 // num_zeros_/2*filter_cutoff_
      filter;    // sinc filter function
  if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
    window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
  else
    window = 0.0;  // outside support of window function
  if (t != 0)
    filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
  else
    filter = 2 * filter_cutoff_;  // limit of the function at t = 0
  return filter * window;
}

void LinearResample::Reset() {
  input_sample_offset_ = 0;
  output_sample_offset_ = 0;
  input_remainder_.resize(0);
}

void LinearResample::Resample(const float *input, int32_t input_dim, bool flush,
                              std::vector<float> *output) {
  int64_t tot_input_samp = input_sample_offset_ + input_dim,
          tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);

  assert(tot_output_samp >= output_sample_offset_);

  output->resize(tot_output_samp - output_sample_offset_);

  // samp_out is the index into the total output signal, not just the part
  // of it we are producing here.
  for (int64_t samp_out = output_sample_offset_; samp_out < tot_output_samp;
       samp_out++) {
    int64_t first_samp_in;
    int32_t samp_out_wrapped;
    GetIndexes(samp_out, &first_samp_in, &samp_out_wrapped);
    const std::vector<float> &weights = weights_[samp_out_wrapped];
    // first_input_index is the first index into "input" that we have a weight
    // for.
    int32_t first_input_index =
        static_cast<int32_t>(first_samp_in - input_sample_offset_);
    float this_output;
    if (first_input_index >= 0 &&
        first_input_index + static_cast<int32_t>(weights.size()) <= input_dim) {
      this_output =
          DotProduct(input + first_input_index, weights.data(), weights.size());
    } else {  // Handle edge cases.
      this_output = 0.0;
      for (int32_t i = 0; i < static_cast<int32_t>(weights.size()); i++) {
        float weight = weights[i];
        int32_t input_index = first_input_index + i;
        if (input_index < 0 &&
            static_cast<int32_t>(input_remainder_.size()) + input_index >= 0) {
          this_output +=
              weight * input_remainder_[input_remainder_.size() + input_index];
        } else if (input_index >= 0 && input_index < input_dim) {
          this_output += weight * input[input_index];
        } else if (input_index >= input_dim) {
          // We're past the end of the input and are adding zero; should only
          // happen if the user specified flush == true, or else we would not
          // be trying to output this sample.
          assert(flush);
        }
      }
    }
    int32_t output_index =
        static_cast<int32_t>(samp_out - output_sample_offset_);
    (*output)[output_index] = this_output;
  }

  if (flush) {
    Reset();  // Reset the internal state.
  } else {
    SetRemainder(input, input_dim);
    input_sample_offset_ = tot_input_samp;
    output_sample_offset_ = tot_output_samp;
  }
}

int64_t LinearResample::GetNumOutputSamples(int64_t input_num_samp,
                                            bool flush) const {
  // For exact computation, we measure time in "ticks" of 1.0 / tick_freq,
  // where tick_freq is the least common multiple of samp_rate_in_ and
  // samp_rate_out_.
  int32_t tick_freq = Lcm(samp_rate_in_, samp_rate_out_);
  int32_t ticks_per_input_period = tick_freq / samp_rate_in_;

  // work out the number of ticks in the time interval
  // [ 0, input_num_samp/samp_rate_in_ ).
  int64_t interval_length_in_ticks = input_num_samp * ticks_per_input_period;
  if (!flush) {
    float window_width = num_zeros_ / (2.0 * filter_cutoff_);
    // To count the window-width in ticks we take the floor.  This
    // is because since we're looking for the largest integer num-out-samp
    // that fits in the interval, which is open on the right, a reduction
    // in interval length of less than a tick will never make a difference.
    // For example, the largest integer in the interval [ 0, 2 ) and the
    // largest integer in the interval [ 0, 2 - 0.9 ) are the same (both one).
    // So when we're subtracting the window-width we can ignore the fractional
    // part.
    int32_t window_width_ticks = floor(window_width * tick_freq);
    // The time-period of the output that we can sample gets reduced
    // by the window-width (which is actually the distance from the
    // center to the edge of the windowing function) if we're not
    // "flushing the output".
    interval_length_in_ticks -= window_width_ticks;
  }
  if (interval_length_in_ticks <= 0) return 0;

  int32_t ticks_per_output_period = tick_freq / samp_rate_out_;
  // Get the last output-sample in the closed interval, i.e. replacing [ ) with
  // [ ].  Note: integer division rounds down.  See
  // http://en.wikipedia.org/wiki/Interval_(mathematics) for an explanation of
  // the notation.
  int64_t last_output_samp = interval_length_in_ticks / ticks_per_output_period;
  // We need the last output-sample in the open interval, so if it takes us to
  // the end of the interval exactly, subtract one.
  if (last_output_samp * ticks_per_output_period == interval_length_in_ticks)
    last_output_samp--;

  // First output-sample index is zero, so the number of output samples
  // is the last output-sample plus one.
  int64_t num_output_samp = last_output_samp + 1;
  return num_output_samp;
}

// inline
void LinearResample::GetIndexes(int64_t samp_out, int64_t *first_samp_in,
                                int32_t *samp_out_wrapped) const {
  // A unit is the smallest nonzero amount of time that is an exact
  // multiple of the input and output sample periods.  The unit index
  // is the answer to "which numbered unit we are in".
  int64_t unit_index = samp_out / output_samples_in_unit_;
  // samp_out_wrapped is equal to samp_out % output_samples_in_unit_
  *samp_out_wrapped =
      static_cast<int32_t>(samp_out - unit_index * output_samples_in_unit_);
  *first_samp_in =
      first_index_[*samp_out_wrapped] + unit_index * input_samples_in_unit_;
}

void LinearResample::SetRemainder(const float *input, int32_t input_dim) {
  std::vector<float> old_remainder(input_remainder_);
  // max_remainder_needed is the width of the filter from side to side,
  // measured in input samples.  you might think it should be half that,
  // but you have to consider that you might be wanting to output samples
  // that are "in the past" relative to the beginning of the latest
  // input... anyway, storing more remainder than needed is not harmful.
  int32_t max_remainder_needed =
      ceil(samp_rate_in_ * num_zeros_ / filter_cutoff_);
  input_remainder_.resize(max_remainder_needed);
  for (int32_t index = -static_cast<int32_t>(input_remainder_.size());
       index < 0; index++) {
    // we interpret "index" as an offset from the end of "input" and
    // from the end of input_remainder_.
    int32_t input_index = index + input_dim;
    if (input_index >= 0) {
      input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
          input[input_index];
    } else if (input_index + static_cast<int32_t>(old_remainder.size()) >= 0) {
      input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
          old_remainder[input_index +
                        static_cast<int32_t>(old_remainder.size())];
      // else leave it at zero.
    }
  }
}

 funasr/runtime/onnxruntime/src/resample.h

New file
@@ -0,0 +1,137 @@
/**
 * Copyright     2013  Pegah Ghahremani
 *               2014  IMSL, PKU-HKUST (author: Wei Shi)
 *               2014  Yanqing Sun, Junjie Wang
 *               2014  Johns Hopkins University (author: Daniel Povey)
 * Copyright     2023  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
// this file is copied and modified from
// kaldi/src/feat/resample.h

#include <cstdint>
#include <vector>


/*
   We require that the input and output sampling rate be specified as
   integers, as this is an easy way to specify that their ratio be rational.
*/

class LinearResample {
 public:
  /// Constructor.  We make the input and output sample rates integers, because
  /// we are going to need to find a common divisor.  This should just remind
  /// you that they need to be integers.  The filter cutoff needs to be less
  /// than samp_rate_in_hz/2 and less than samp_rate_out_hz/2.  num_zeros
  /// controls the sharpness of the filter, more == sharper but less efficient.
  /// We suggest around 4 to 10 for normal use.
  LinearResample(int32_t samp_rate_in_hz, int32_t samp_rate_out_hz,
                 float filter_cutoff_hz, int32_t num_zeros);

  /// Calling the function Reset() resets the state of the object prior to
  /// processing a new signal; it is only necessary if you have called
  /// Resample(x, x_size, false, y) for some signal, leading to a remainder of
  /// the signal being called, but then abandon processing the signal before
  /// calling Resample(x, x_size, true, y) for the last piece.  Call it
  /// unnecessarily between signals will not do any harm.
  void Reset();

  /// This function does the resampling.  If you call it with flush == true and
  /// you have never called it with flush == false, it just resamples the input
  /// signal (it resizes the output to a suitable number of samples).
  ///
  /// You can also use this function to process a signal a piece at a time.
  /// suppose you break it into piece1, piece2, ... pieceN.  You can call
  /// \code{.cc}
  /// Resample(piece1, piece1_size, false, &output1);
  /// Resample(piece2, piece2_size, false, &output2);
  /// Resample(piece3, piece3_size, true, &output3);
  /// \endcode
  /// If you call it with flush == false, it won't output the last few samples
  /// but will remember them, so that if you later give it a second piece of
  /// the input signal it can process it correctly.
  /// If your most recent call to the object was with flush == false, it will
  /// have internal state; you can remove this by calling Reset().
  /// Empty input is acceptable.
  void Resample(const float *input, int32_t input_dim, bool flush,
                std::vector<float> *output);

  //// Return the input and output sampling rates (for checks, for example)
  int32_t GetInputSamplingRate() const { return samp_rate_in_; }
  int32_t GetOutputSamplingRate() const { return samp_rate_out_; }

 private:
  void SetIndexesAndWeights();

  float FilterFunc(float) const;

  /// This function outputs the number of output samples we will output
  /// for a signal with "input_num_samp" input samples.  If flush == true,
  /// we return the largest n such that
  /// (n/samp_rate_out_) is in the interval [ 0, input_num_samp/samp_rate_in_ ),
  /// and note that the interval is half-open.  If flush == false,
  /// define window_width as num_zeros / (2.0 * filter_cutoff_);
  /// we return the largest n such that (n/samp_rate_out_) is in the interval
  /// [ 0, input_num_samp/samp_rate_in_ - window_width ).
  int64_t GetNumOutputSamples(int64_t input_num_samp, bool flush) const;

  /// Given an output-sample index, this function outputs to *first_samp_in the
  /// first input-sample index that we have a weight on (may be negative),
  /// and to *samp_out_wrapped the index into weights_ where we can get the
  /// corresponding weights on the input.
  inline void GetIndexes(int64_t samp_out, int64_t *first_samp_in,
                         int32_t *samp_out_wrapped) const;

  void SetRemainder(const float *input, int32_t input_dim);

 private:
  // The following variables are provided by the user.
  int32_t samp_rate_in_;
  int32_t samp_rate_out_;
  float filter_cutoff_;
  int32_t num_zeros_;

  int32_t input_samples_in_unit_;  ///< The number of input samples in the
                                   ///< smallest repeating unit: num_samp_in_ =
                                   ///< samp_rate_in_hz / Gcd(samp_rate_in_hz,
                                   ///< samp_rate_out_hz)

  int32_t output_samples_in_unit_;  ///< The number of output samples in the
                                    ///< smallest repeating unit: num_samp_out_
                                    ///< = samp_rate_out_hz /
                                    ///< Gcd(samp_rate_in_hz, samp_rate_out_hz)

  /// The first input-sample index that we sum over, for this output-sample
  /// index.  May be negative; any truncation at the beginning is handled
  /// separately.  This is just for the first few output samples, but we can
  /// extrapolate the correct input-sample index for arbitrary output samples.
  std::vector<int32_t> first_index_;

  /// Weights on the input samples, for this output-sample index.
  std::vector<std::vector<float>> weights_;

  // the following variables keep track of where we are in a particular signal,
  // if it is being provided over multiple calls to Resample().

  int64_t input_sample_offset_;   ///< The number of input samples we have
                                  ///< already received for this signal
                                  ///< (including anything in remainder_)
  int64_t output_sample_offset_;  ///< The number of samples we have already
                                  ///< output for this signal.
  std::vector<float> input_remainder_;  ///< A small trailing part of the
                                        ///< previously seen input signal.
};

			@@ -2,24 +2,27 @@

			project(FunASRonnx)

			set(CMAKE_CXX_STANDARD 11)
			# set(CMAKE_CXX_STANDARD 11)
			set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
			set(CMAKE_POSITION_INDEPENDENT_CODE ON)

			include(TestBigEndian)
			test_big_endian(BIG_ENDIAN)
			if(BIG_ENDIAN)
			message("Big endian system")
			else()
			message("Little endian system")
			endif()

			# for onnxruntime

			IF(WIN32)


			if(CMAKE_CL_64)
			link_directories(${ONNXRUNTIME_DIR}\\lib)
			else()
			add_definitions(-D_WIN_X86)
			endif()
			ELSE()


			link_directories(${ONNXRUNTIME_DIR}/lib)

			link_directories(${ONNXRUNTIME_DIR}/lib)
			endif()

			add_subdirectory("./third_party/yaml-cpp")

			@@ -6,6 +6,13 @@
			#include <queue>
			#include <stdint.h>

			#ifndef model_sample_rate
			#define model_sample_rate 16000
			#endif
			#ifndef WAV_HEADER_SIZE
			#define WAV_HEADER_SIZE 44
			#endif

			using namespace std;

			class AudioFrame {
			@@ -32,7 +39,6 @@
			int16_t *speech_buff;
			int speech_len;
			int speech_align_len;
			int16_t sample_rate;
			int offset;
			float align_size;
			int data_type;
			@@ -43,10 +49,11 @@
			Audio(int data_type, int size);
			~Audio();
			void disp();
			bool loadwav(const char* filename);
			bool loadwav(const char* buf, int nLen);
			bool loadpcmwav(const char* buf, int nFileLen);
			bool loadpcmwav(const char* filename);
			bool loadwav(const char* filename, int32_t* sampling_rate);
			void wavResample(int32_t sampling_rate, const float *waveform, int32_t n);
			bool loadwav(const char* buf, int nLen, int32_t* sampling_rate);
			bool loadpcmwav(const char* buf, int nFileLen, int32_t* sampling_rate);
			bool loadpcmwav(const char* filename, int32_t* sampling_rate);
			int fetch_chunck(float *&dout, int len);
			int fetch(float *&dout, int &len, int &flag);
			void padding();

			@@ -55,9 +55,9 @@
			// if not give a fnCallback ,it should be NULL
			_FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback);

			_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
			_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback);

			_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
			_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback);

			_FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* szWavfile, FUNASR_MODE Mode, QM_CALLBACK fnCallback);

			@@ -3,10 +3,95 @@
			#include <stdio.h>
			#include <stdlib.h>
			#include <string.h>
			#include <fstream>
			#include <assert.h>

			#include "Audio.h"
			#include "precomp.h"

			using namespace std;

			// see http://soundfile.sapp.org/doc/WaveFormat/
			// Note: We assume little endian here
			struct WaveHeader {
			bool Validate() const {
			// F F I R
			if (chunk_id != 0x46464952) {
			printf("Expected chunk_id RIFF. Given: 0x%08x\n", chunk_id);
			return false;
			}
			// E V A W
			if (format != 0x45564157) {
			printf("Expected format WAVE. Given: 0x%08x\n", format);
			return false;
			}

			if (subchunk1_id != 0x20746d66) {
			printf("Expected subchunk1_id 0x20746d66. Given: 0x%08x\n",
			subchunk1_id);
			return false;
			}

			if (subchunk1_size != 16) { // 16 for PCM
			printf("Expected subchunk1_size 16. Given: %d\n",
			subchunk1_size);
			return false;
			}

			if (audio_format != 1) { // 1 for PCM
			printf("Expected audio_format 1. Given: %d\n", audio_format);
			return false;
			}

			if (num_channels != 1) { // we support only single channel for now
			printf("Expected single channel. Given: %d\n", num_channels);
			return false;
			}
			if (byte_rate != (sample_rate * num_channels * bits_per_sample / 8)) {
			return false;
			}

			if (block_align != (num_channels * bits_per_sample / 8)) {
			return false;
			}

			if (bits_per_sample != 16) { // we support only 16 bits per sample
			printf("Expected bits_per_sample 16. Given: %d\n",
			bits_per_sample);
			return false;
			}
			return true;
			}

			// See https://en.wikipedia.org/wiki/WAV#Metadata and
			// https://www.robotplanet.dk/audio/wav_meta_data/riff_mci.pdf
			void SeekToDataChunk(std::istream &is) {
			// a t a d
			while (is && subchunk2_id != 0x61746164) {
			// const char p = reinterpret_cast<const char >(&subchunk2_id);
			// printf("Skip chunk (%x): %c%c%c%c of size: %d\n", subchunk2_id, p[0],
			// p[1], p[2], p[3], subchunk2_size);
			is.seekg(subchunk2_size, std::istream::cur);
			is.read(reinterpret_cast<char *>(&subchunk2_id), sizeof(int32_t));
			is.read(reinterpret_cast<char *>(&subchunk2_size), sizeof(int32_t));
			}
			}

			int32_t chunk_id;
			int32_t chunk_size;
			int32_t format;
			int32_t subchunk1_id;
			int32_t subchunk1_size;
			int16_t audio_format;
			int16_t num_channels;
			int32_t sample_rate;
			int32_t byte_rate;
			int16_t block_align;
			int16_t bits_per_sample;
			int32_t subchunk2_id; // a tag of this chunk
			int32_t subchunk2_size; // size of subchunk2
			};
			static_assert(sizeof(WaveHeader) == WAV_HEADER_SIZE, "");

			class AudioWindow {
			private:
			@@ -56,7 +141,7 @@
			float frame_length = 400;
			float frame_shift = 160;
			float num_new_samples =
			ceil((num_samples - 400) / frame_shift) * frame_shift + frame_length;
			ceil((num_samples - frame_length) / frame_shift) * frame_shift + frame_length;

			end = start + num_new_samples;
			len = (int)num_new_samples;
			@@ -111,62 +196,95 @@

			void Audio::disp()
			{
			printf("Audio time is %f s. len is %d\n", (float)speech_len / 16000,
			printf("Audio time is %f s. len is %d\n", (float)speech_len / model_sample_rate,
			speech_len);
			}

			float Audio::get_time_len()
			{
			return (float)speech_len / 16000;
			//speech_len);
			return (float)speech_len / model_sample_rate;
			}

			bool Audio::loadwav(const char *filename)
			void Audio::wavResample(int32_t sampling_rate, const float *waveform,
			int32_t n)
			{
			printf(
			"Creating a resampler:\n"
			" in_sample_rate: %d\n"
			" output_sample_rate: %d\n",
			sampling_rate, static_cast<int32_t>(model_sample_rate));
			float min_freq =
			std::min<int32_t>(sampling_rate, model_sample_rate);
			float lowpass_cutoff = 0.99 * 0.5 * min_freq;

			int32_t lowpass_filter_width = 6;
			//FIXME
			//auto resampler = new LinearResample(
			// sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
			auto resampler = std::make_unique<LinearResample>(
			sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
			std::vector<float> samples;
			resampler->Resample(waveform, n, true, &samples);
			//reset speech_data
			speech_len = samples.size();
			if (speech_data != NULL) {
			free(speech_data);
			}
			speech_data = (float)malloc(sizeof(float) speech_len);
			memset(speech_data, 0, sizeof(float) * speech_len);
			copy(samples.begin(), samples.end(), speech_data);
			}

			bool Audio::loadwav(const char filename, int32_t sampling_rate)
			{
			WaveHeader header;
			if (speech_data != NULL) {
			free(speech_data);
			}
			if (speech_buff != NULL) {
			free(speech_buff);
			}


			offset = 0;

			FILE *fp;
			fp = fopen(filename, "rb");
			if (fp == nullptr)
			std::ifstream is(filename, std::ifstream::binary);
			is.read(reinterpret_cast<char *>(&header), sizeof(header));
			if(!is){
			fprintf(stderr, "Failed to read %s\n", filename);
			return false;
			fseek(fp, 0, SEEK_END); /定位到文件末尾/
			uint32_t nFileLen = ftell(fp); /得到文件大小/
			fseek(fp, 44, SEEK_SET); /跳过wav文件头/

			speech_len = (nFileLen - 44) / 2;
			speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
			speech_buff = (int16_t )malloc(sizeof(int16_t) speech_align_len);
			}

			*sampling_rate = header.sample_rate;
			// header.subchunk2_size contains the number of bytes in the data.
			// As we assume each sample contains two bytes, so it is divided by 2 here
			speech_len = header.subchunk2_size / 2;
			speech_buff = (int16_t )malloc(sizeof(int16_t) speech_len);

			if (speech_buff)
			{
			memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
			int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
			fclose(fp);
			memset(speech_buff, 0, sizeof(int16_t) * speech_len);
			is.read(reinterpret_cast<char *>(speech_buff), header.subchunk2_size);
			if (!is) {
			fprintf(stderr, "Failed to read %s\n", filename);
			return false;
			}
			speech_data = (float)malloc(sizeof(float) speech_len);
			memset(speech_data, 0, sizeof(float) * speech_len);

			speech_data = (float)malloc(sizeof(float) speech_align_len);
			memset(speech_data, 0, sizeof(float) * speech_align_len);
			int i;
			float scale = 1;

			if (data_type == 1) {
			scale = 32768;
			}

			for (i = 0; i < speech_len; i++) {
			for (int32_t i = 0; i != speech_len; ++i) {
			speech_data[i] = (float)speech_buff[i] / scale;
			}

			//resample
			if(*sampling_rate != model_sample_rate){
			wavResample(*sampling_rate, speech_data, speech_len);
			}

			AudioFrame* frame = new AudioFrame(speech_len);
			frame_queue.push(frame);


			return true;
			}
			@@ -174,57 +292,54 @@
			return false;
			}


			bool Audio::loadwav(const char* buf, int nFileLen)
			bool Audio::loadwav(const char* buf, int nFileLen, int32_t* sampling_rate)
			{



			WaveHeader header;
			if (speech_data != NULL) {
			free(speech_data);
			}
			if (speech_buff != NULL) {
			free(speech_buff);
			}

			offset = 0;

			size_t nOffset = 0;
			std::memcpy(&header, buf, sizeof(header));

			#define WAV_HEADER_SIZE 44

			speech_len = (nFileLen - WAV_HEADER_SIZE) / 2;
			speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
			speech_buff = (int16_t)malloc(sizeof(int16_t) speech_align_len);
			*sampling_rate = header.sample_rate;
			speech_len = header.subchunk2_size / 2;
			speech_buff = (int16_t )malloc(sizeof(int16_t) speech_len);
			if (speech_buff)
			{
			memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
			memset(speech_buff, 0, sizeof(int16_t) * speech_len);
			memcpy((void)speech_buff, (const void)(buf + WAV_HEADER_SIZE), speech_len * sizeof(int16_t));

			speech_data = (float)malloc(sizeof(float) speech_len);
			memset(speech_data, 0, sizeof(float) * speech_len);

			speech_data = (float)malloc(sizeof(float) speech_align_len);
			memset(speech_data, 0, sizeof(float) * speech_align_len);
			int i;
			float scale = 1;

			if (data_type == 1) {
			scale = 32768;
			}

			for (i = 0; i < speech_len; i++) {
			for (int32_t i = 0; i != speech_len; ++i) {
			speech_data[i] = (float)speech_buff[i] / scale;
			}

			//resample
			if(*sampling_rate != model_sample_rate){
			wavResample(*sampling_rate, speech_data, speech_len);
			}

			AudioFrame* frame = new AudioFrame(speech_len);
			frame_queue.push(frame);

			return true;
			}
			else
			return false;

			}


			bool Audio::loadpcmwav(const char* buf, int nBufLen)
			bool Audio::loadpcmwav(const char* buf, int nBufLen, int32_t* sampling_rate)
			{
			if (speech_data != NULL) {
			free(speech_data);
			@@ -234,32 +349,28 @@
			}
			offset = 0;

			size_t nOffset = 0;



			speech_len = nBufLen / 2;
			speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
			speech_buff = (int16_t)malloc(sizeof(int16_t) speech_align_len);
			speech_buff = (int16_t)malloc(sizeof(int16_t) speech_len);
			if (speech_buff)
			{
			memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
			memset(speech_buff, 0, sizeof(int16_t) * speech_len);
			memcpy((void)speech_buff, (const void)buf, speech_len * sizeof(int16_t));

			speech_data = (float)malloc(sizeof(float) speech_len);
			memset(speech_data, 0, sizeof(float) * speech_len);

			speech_data = (float)malloc(sizeof(float) speech_align_len);
			memset(speech_data, 0, sizeof(float) * speech_align_len);


			int i;
			float scale = 1;

			if (data_type == 1) {
			scale = 32768;
			}

			for (i = 0; i < speech_len; i++) {
			for (int32_t i = 0; i != speech_len; ++i) {
			speech_data[i] = (float)speech_buff[i] / scale;
			}

			//resample
			if(*sampling_rate != model_sample_rate){
			wavResample(*sampling_rate, speech_data, speech_len);
			}

			AudioFrame* frame = new AudioFrame(speech_len);
			@@ -269,13 +380,10 @@
			}
			else
			return false;


			}

			bool Audio::loadpcmwav(const char* filename)
			bool Audio::loadpcmwav(const char* filename, int32_t* sampling_rate)
			{

			if (speech_data != NULL) {
			free(speech_data);
			}
			@@ -293,34 +401,31 @@
			fseek(fp, 0, SEEK_SET);

			speech_len = (nFileLen) / 2;
			speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
			speech_buff = (int16_t)malloc(sizeof(int16_t) speech_align_len);
			speech_buff = (int16_t)malloc(sizeof(int16_t) speech_len);
			if (speech_buff)
			{
			memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
			memset(speech_buff, 0, sizeof(int16_t) * speech_len);
			int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
			fclose(fp);

			speech_data = (float)malloc(sizeof(float) speech_align_len);
			memset(speech_data, 0, sizeof(float) * speech_align_len);
			speech_data = (float)malloc(sizeof(float) speech_len);
			memset(speech_data, 0, sizeof(float) * speech_len);



			int i;
			float scale = 1;

			if (data_type == 1) {
			scale = 32768;
			}

			for (i = 0; i < speech_len; i++) {
			for (int32_t i = 0; i != speech_len; ++i) {
			speech_data[i] = (float)speech_buff[i] / scale;
			}

			//resample
			if(*sampling_rate != model_sample_rate){
			wavResample(*sampling_rate, speech_data, speech_len);
			}

			AudioFrame* frame = new AudioFrame(speech_len);
			frame_queue.push(frame);


			return true;
			}
			@@ -328,7 +433,6 @@
			return false;

			}


			int Audio::fetch_chunck(float *&dout, int len)
			{

			@@ -1,5 +1,6 @@

			file(GLOB files1 "*.cpp")
			file(GLOB files2 "*.cc")
			file(GLOB files4 "paraformer/*.cpp")

			set(files ${files1} ${files2} ${files3} ${files4})

			@@ -13,21 +13,6 @@
			{
			ifstream in(filename);
			loadVocabFromYaml(filename);

			/*
			string line;
			if (in) // 有该文件
			{
			while (getline(in, line)) // line中不包括每行的换行符
			{
			vocab.push_back(line);
			}
			}
			else{
			printf("Cannot load vocab from: %s, there must be file vocab.txt", filename);
			exit(-1);
			}
			*/
			}
			Vocab::~Vocab()
			{

			@@ -17,8 +17,9 @@
			if (!pRecogObj)
			return nullptr;

			int32_t sampling_rate = -1;
			Audio audio(1);
			if (!audio.loadwav(szBuf, nLen))
			if (!audio.loadwav(szBuf, nLen, &sampling_rate))
			return nullptr;
			//audio.split();

			@@ -41,14 +42,14 @@
			return pResult;
			}

			_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
			_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
			{
			Model* pRecogObj = (Model*)handle;
			if (!pRecogObj)
			return nullptr;

			Audio audio(1);
			if (!audio.loadpcmwav(szBuf, nLen))
			if (!audio.loadpcmwav(szBuf, nLen, &sampling_rate))
			return nullptr;
			//audio.split();

			@@ -71,14 +72,14 @@
			return pResult;
			}

			_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
			_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
			{
			Model* pRecogObj = (Model*)handle;
			if (!pRecogObj)
			return nullptr;

			Audio audio(1);
			if (!audio.loadpcmwav(szFileName))
			if (!audio.loadpcmwav(szFileName, &sampling_rate))
			return nullptr;
			//audio.split();

			@@ -106,9 +107,10 @@
			Model* pRecogObj = (Model*)handle;
			if (!pRecogObj)
			return nullptr;


			int32_t sampling_rate = -1;
			Audio audio(1);
			if(!audio.loadwav(szWavfile))
			if(!audio.loadwav(szWavfile, &sampling_rate))
			return nullptr;
			//audio.split();

			@@ -70,7 +70,6 @@

			void ModelImp::reset()
			{
			printf("Not Imp!!!!!!\n");
			}

			void ModelImp::apply_lfr(Tensor<float>*& din)

			@@ -44,6 +44,7 @@
			#include "FeatureQueue.h"
			#include "SpeechWrap.h"
			#include <Audio.h>
			#include "resample.h"
			#include "Model.h"
			#include "paraformer_onnx.h"
			#include "libfunasrapi.h"

New file
			@@ -0,0 +1,305 @@
			/**
			* Copyright 2013 Pegah Ghahremani
			* 2014 IMSL, PKU-HKUST (author: Wei Shi)
			* 2014 Yanqing Sun, Junjie Wang
			* 2014 Johns Hopkins University (author: Daniel Povey)
			* Copyright 2023 Xiaomi Corporation (authors: Fangjun Kuang)
			*
			* See LICENSE for clarification regarding multiple authors
			*
			* Licensed under the Apache License, Version 2.0 (the "License");
			* you may not use this file except in compliance with the License.
			* You may obtain a copy of the License at
			*
			* http://www.apache.org/licenses/LICENSE-2.0
			*
			* Unless required by applicable law or agreed to in writing, software
			* distributed under the License is distributed on an "AS IS" BASIS,
			* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			* See the License for the specific language governing permissions and
			* limitations under the License.
			*/
			// this file is copied and modified from
			// kaldi/src/feat/resample.cc

			#include "resample.h"

			#include <assert.h>
			#include <math.h>
			#include <stdio.h>

			#include <cstdlib>
			#include <type_traits>

			#ifndef M_2PI
			#define M_2PI 6.283185307179586476925286766559005
			#endif

			#ifndef M_PI
			#define M_PI 3.1415926535897932384626433832795
			#endif

			template <class I>
			I Gcd(I m, I n) {
			// this function is copied from kaldi/src/base/kaldi-math.h
			if (m == 0 \|\| n == 0) {
			if (m == 0 && n == 0) { // gcd not defined, as all integers are divisors.
			fprintf(stderr, "Undefined GCD since m = 0, n = 0.\n");
			exit(-1);
			}
			return (m == 0 ? (n > 0 ? n : -n) : (m > 0 ? m : -m));
			// return absolute value of whichever is nonzero
			}
			// could use compile-time assertion
			// but involves messing with complex template stuff.
			static_assert(std::is_integral<I>::value, "");
			while (1) {
			m %= n;
			if (m == 0) return (n > 0 ? n : -n);
			n %= m;
			if (n == 0) return (m > 0 ? m : -m);
			}
			}

			/// Returns the least common multiple of two integers. Will
			/// crash unless the inputs are positive.
			template <class I>
			I Lcm(I m, I n) {
			// This function is copied from kaldi/src/base/kaldi-math.h
			assert(m > 0 && n > 0);
			I gcd = Gcd(m, n);
			return gcd * (m / gcd) * (n / gcd);
			}

			static float DotProduct(const float a, const float b, int32_t n) {
			float sum = 0;
			for (int32_t i = 0; i != n; ++i) {
			sum += a[i] * b[i];
			}
			return sum;
			}

			LinearResample::LinearResample(int32_t samp_rate_in_hz,
			int32_t samp_rate_out_hz, float filter_cutoff_hz,
			int32_t num_zeros)
			: samp_rate_in_(samp_rate_in_hz),
			samp_rate_out_(samp_rate_out_hz),
			filter_cutoff_(filter_cutoff_hz),
			num_zeros_(num_zeros) {
			assert(samp_rate_in_hz > 0.0 && samp_rate_out_hz > 0.0 &&
			filter_cutoff_hz > 0.0 && filter_cutoff_hz * 2 <= samp_rate_in_hz &&
			filter_cutoff_hz * 2 <= samp_rate_out_hz && num_zeros > 0);

			// base_freq is the frequency of the repeating unit, which is the gcd
			// of the input frequencies.
			int32_t base_freq = Gcd(samp_rate_in_, samp_rate_out_);
			input_samples_in_unit_ = samp_rate_in_ / base_freq;
			output_samples_in_unit_ = samp_rate_out_ / base_freq;

			SetIndexesAndWeights();
			Reset();
			}

			void LinearResample::SetIndexesAndWeights() {
			first_index_.resize(output_samples_in_unit_);
			weights_.resize(output_samples_in_unit_);

			double window_width = num_zeros_ / (2.0 * filter_cutoff_);

			for (int32_t i = 0; i < output_samples_in_unit_; i++) {
			double output_t = i / static_cast<double>(samp_rate_out_);
			double min_t = output_t - window_width, max_t = output_t + window_width;
			// we do ceil on the min and floor on the max, because if we did it
			// the other way around we would unnecessarily include indexes just
			// outside the window, with zero coefficients. It's possible
			// if the arguments to the ceil and floor expressions are integers
			// (e.g. if filter_cutoff_ has an exact ratio with the sample rates),
			// that we unnecessarily include something with a zero coefficient,
			// but this is only a slight efficiency issue.
			int32_t min_input_index = ceil(min_t * samp_rate_in_),
			max_input_index = floor(max_t * samp_rate_in_),
			num_indices = max_input_index - min_input_index + 1;
			first_index_[i] = min_input_index;
			weights_[i].resize(num_indices);
			for (int32_t j = 0; j < num_indices; j++) {
			int32_t input_index = min_input_index + j;
			double input_t = input_index / static_cast<double>(samp_rate_in_),
			delta_t = input_t - output_t;
			// sign of delta_t doesn't matter.
			weights_[i][j] = FilterFunc(delta_t) / samp_rate_in_;
			}
			}
			}

			/** Here, t is a time in seconds representing an offset from
			the center of the windowed filter function, and FilterFunction(t)
			returns the windowed filter function, described
			in the header as h(t) = f(t)g(t), evaluated at t.
			*/
			float LinearResample::FilterFunc(float t) const {
			float window, // raised-cosine (Hanning) window of width
			// num_zeros_/2*filter_cutoff_
			filter; // sinc filter function
			if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
			window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
			else
			window = 0.0; // outside support of window function
			if (t != 0)
			filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
			else
			filter = 2 * filter_cutoff_; // limit of the function at t = 0
			return filter * window;
			}

			void LinearResample::Reset() {
			input_sample_offset_ = 0;
			output_sample_offset_ = 0;
			input_remainder_.resize(0);
			}

			void LinearResample::Resample(const float *input, int32_t input_dim, bool flush,
			std::vector<float> *output) {
			int64_t tot_input_samp = input_sample_offset_ + input_dim,
			tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);

			assert(tot_output_samp >= output_sample_offset_);

			output->resize(tot_output_samp - output_sample_offset_);

			// samp_out is the index into the total output signal, not just the part
			// of it we are producing here.
			for (int64_t samp_out = output_sample_offset_; samp_out < tot_output_samp;
			samp_out++) {
			int64_t first_samp_in;
			int32_t samp_out_wrapped;
			GetIndexes(samp_out, &first_samp_in, &samp_out_wrapped);
			const std::vector<float> &weights = weights_[samp_out_wrapped];
			// first_input_index is the first index into "input" that we have a weight
			// for.
			int32_t first_input_index =
			static_cast<int32_t>(first_samp_in - input_sample_offset_);
			float this_output;
			if (first_input_index >= 0 &&
			first_input_index + static_cast<int32_t>(weights.size()) <= input_dim) {
			this_output =
			DotProduct(input + first_input_index, weights.data(), weights.size());
			} else { // Handle edge cases.
			this_output = 0.0;
			for (int32_t i = 0; i < static_cast<int32_t>(weights.size()); i++) {
			float weight = weights[i];
			int32_t input_index = first_input_index + i;
			if (input_index < 0 &&
			static_cast<int32_t>(input_remainder_.size()) + input_index >= 0) {
			this_output +=
			weight * input_remainder_[input_remainder_.size() + input_index];
			} else if (input_index >= 0 && input_index < input_dim) {
			this_output += weight * input[input_index];
			} else if (input_index >= input_dim) {
			// We're past the end of the input and are adding zero; should only
			// happen if the user specified flush == true, or else we would not
			// be trying to output this sample.
			assert(flush);
			}
			}
			}
			int32_t output_index =
			static_cast<int32_t>(samp_out - output_sample_offset_);
			(*output)[output_index] = this_output;
			}

			if (flush) {
			Reset(); // Reset the internal state.
			} else {
			SetRemainder(input, input_dim);
			input_sample_offset_ = tot_input_samp;
			output_sample_offset_ = tot_output_samp;
			}
			}

			int64_t LinearResample::GetNumOutputSamples(int64_t input_num_samp,
			bool flush) const {
			// For exact computation, we measure time in "ticks" of 1.0 / tick_freq,
			// where tick_freq is the least common multiple of samp_rate_in_ and
			// samp_rate_out_.
			int32_t tick_freq = Lcm(samp_rate_in_, samp_rate_out_);
			int32_t ticks_per_input_period = tick_freq / samp_rate_in_;

			// work out the number of ticks in the time interval
			// [ 0, input_num_samp/samp_rate_in_ ).
			int64_t interval_length_in_ticks = input_num_samp * ticks_per_input_period;
			if (!flush) {
			float window_width = num_zeros_ / (2.0 * filter_cutoff_);
			// To count the window-width in ticks we take the floor. This
			// is because since we're looking for the largest integer num-out-samp
			// that fits in the interval, which is open on the right, a reduction
			// in interval length of less than a tick will never make a difference.
			// For example, the largest integer in the interval [ 0, 2 ) and the
			// largest integer in the interval [ 0, 2 - 0.9 ) are the same (both one).
			// So when we're subtracting the window-width we can ignore the fractional
			// part.
			int32_t window_width_ticks = floor(window_width * tick_freq);
			// The time-period of the output that we can sample gets reduced
			// by the window-width (which is actually the distance from the
			// center to the edge of the windowing function) if we're not
			// "flushing the output".
			interval_length_in_ticks -= window_width_ticks;
			}
			if (interval_length_in_ticks <= 0) return 0;

			int32_t ticks_per_output_period = tick_freq / samp_rate_out_;
			// Get the last output-sample in the closed interval, i.e. replacing [ ) with
			// [ ]. Note: integer division rounds down. See
			// http://en.wikipedia.org/wiki/Interval_(mathematics) for an explanation of
			// the notation.
			int64_t last_output_samp = interval_length_in_ticks / ticks_per_output_period;
			// We need the last output-sample in the open interval, so if it takes us to
			// the end of the interval exactly, subtract one.
			if (last_output_samp * ticks_per_output_period == interval_length_in_ticks)
			last_output_samp--;

			// First output-sample index is zero, so the number of output samples
			// is the last output-sample plus one.
			int64_t num_output_samp = last_output_samp + 1;
			return num_output_samp;
			}

			// inline
			void LinearResample::GetIndexes(int64_t samp_out, int64_t *first_samp_in,
			int32_t *samp_out_wrapped) const {
			// A unit is the smallest nonzero amount of time that is an exact
			// multiple of the input and output sample periods. The unit index
			// is the answer to "which numbered unit we are in".
			int64_t unit_index = samp_out / output_samples_in_unit_;
			// samp_out_wrapped is equal to samp_out % output_samples_in_unit_
			*samp_out_wrapped =
			static_cast<int32_t>(samp_out - unit_index * output_samples_in_unit_);
			*first_samp_in =
			first_index_[samp_out_wrapped] + unit_index input_samples_in_unit_;
			}

			void LinearResample::SetRemainder(const float *input, int32_t input_dim) {
			std::vector<float> old_remainder(input_remainder_);
			// max_remainder_needed is the width of the filter from side to side,
			// measured in input samples. you might think it should be half that,
			// but you have to consider that you might be wanting to output samples
			// that are "in the past" relative to the beginning of the latest
			// input... anyway, storing more remainder than needed is not harmful.
			int32_t max_remainder_needed =
			ceil(samp_rate_in_ * num_zeros_ / filter_cutoff_);
			input_remainder_.resize(max_remainder_needed);
			for (int32_t index = -static_cast<int32_t>(input_remainder_.size());
			index < 0; index++) {
			// we interpret "index" as an offset from the end of "input" and
			// from the end of input_remainder_.
			int32_t input_index = index + input_dim;
			if (input_index >= 0) {
			input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
			input[input_index];
			} else if (input_index + static_cast<int32_t>(old_remainder.size()) >= 0) {
			input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
			old_remainder[input_index +
			static_cast<int32_t>(old_remainder.size())];
			// else leave it at zero.
			}
			}
			}

New file
			@@ -0,0 +1,137 @@
			/**
			* Copyright 2013 Pegah Ghahremani
			* 2014 IMSL, PKU-HKUST (author: Wei Shi)
			* 2014 Yanqing Sun, Junjie Wang
			* 2014 Johns Hopkins University (author: Daniel Povey)
			* Copyright 2023 Xiaomi Corporation (authors: Fangjun Kuang)
			*
			* See LICENSE for clarification regarding multiple authors
			*
			* Licensed under the Apache License, Version 2.0 (the "License");
			* you may not use this file except in compliance with the License.
			* You may obtain a copy of the License at
			*
			* http://www.apache.org/licenses/LICENSE-2.0
			*
			* Unless required by applicable law or agreed to in writing, software
			* distributed under the License is distributed on an "AS IS" BASIS,
			* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			* See the License for the specific language governing permissions and
			* limitations under the License.
			*/
			// this file is copied and modified from
			// kaldi/src/feat/resample.h

			#include <cstdint>
			#include <vector>


			/*
			We require that the input and output sampling rate be specified as
			integers, as this is an easy way to specify that their ratio be rational.
			*/

			class LinearResample {
			public:
			/// Constructor. We make the input and output sample rates integers, because
			/// we are going to need to find a common divisor. This should just remind
			/// you that they need to be integers. The filter cutoff needs to be less
			/// than samp_rate_in_hz/2 and less than samp_rate_out_hz/2. num_zeros
			/// controls the sharpness of the filter, more == sharper but less efficient.
			/// We suggest around 4 to 10 for normal use.
			LinearResample(int32_t samp_rate_in_hz, int32_t samp_rate_out_hz,
			float filter_cutoff_hz, int32_t num_zeros);

			/// Calling the function Reset() resets the state of the object prior to
			/// processing a new signal; it is only necessary if you have called
			/// Resample(x, x_size, false, y) for some signal, leading to a remainder of
			/// the signal being called, but then abandon processing the signal before
			/// calling Resample(x, x_size, true, y) for the last piece. Call it
			/// unnecessarily between signals will not do any harm.
			void Reset();

			/// This function does the resampling. If you call it with flush == true and
			/// you have never called it with flush == false, it just resamples the input
			/// signal (it resizes the output to a suitable number of samples).
			///
			/// You can also use this function to process a signal a piece at a time.
			/// suppose you break it into piece1, piece2, ... pieceN. You can call
			/// \code{.cc}
			/// Resample(piece1, piece1_size, false, &output1);
			/// Resample(piece2, piece2_size, false, &output2);
			/// Resample(piece3, piece3_size, true, &output3);
			/// \endcode
			/// If you call it with flush == false, it won't output the last few samples
			/// but will remember them, so that if you later give it a second piece of
			/// the input signal it can process it correctly.
			/// If your most recent call to the object was with flush == false, it will
			/// have internal state; you can remove this by calling Reset().
			/// Empty input is acceptable.
			void Resample(const float *input, int32_t input_dim, bool flush,
			std::vector<float> *output);

			//// Return the input and output sampling rates (for checks, for example)
			int32_t GetInputSamplingRate() const { return samp_rate_in_; }
			int32_t GetOutputSamplingRate() const { return samp_rate_out_; }

			private:
			void SetIndexesAndWeights();

			float FilterFunc(float) const;

			/// This function outputs the number of output samples we will output
			/// for a signal with "input_num_samp" input samples. If flush == true,
			/// we return the largest n such that
			/// (n/samp_rate_out_) is in the interval [ 0, input_num_samp/samp_rate_in_ ),
			/// and note that the interval is half-open. If flush == false,
			/// define window_width as num_zeros / (2.0 * filter_cutoff_);
			/// we return the largest n such that (n/samp_rate_out_) is in the interval
			/// [ 0, input_num_samp/samp_rate_in_ - window_width ).
			int64_t GetNumOutputSamples(int64_t input_num_samp, bool flush) const;

			/// Given an output-sample index, this function outputs to *first_samp_in the
			/// first input-sample index that we have a weight on (may be negative),
			/// and to *samp_out_wrapped the index into weights_ where we can get the
			/// corresponding weights on the input.
			inline void GetIndexes(int64_t samp_out, int64_t *first_samp_in,
			int32_t *samp_out_wrapped) const;

			void SetRemainder(const float *input, int32_t input_dim);

			private:
			// The following variables are provided by the user.
			int32_t samp_rate_in_;
			int32_t samp_rate_out_;
			float filter_cutoff_;
			int32_t num_zeros_;

			int32_t input_samples_in_unit_; ///< The number of input samples in the
			///< smallest repeating unit: num_samp_in_ =
			///< samp_rate_in_hz / Gcd(samp_rate_in_hz,
			///< samp_rate_out_hz)

			int32_t output_samples_in_unit_; ///< The number of output samples in the
			///< smallest repeating unit: num_samp_out_
			///< = samp_rate_out_hz /
			///< Gcd(samp_rate_in_hz, samp_rate_out_hz)

			/// The first input-sample index that we sum over, for this output-sample
			/// index. May be negative; any truncation at the beginning is handled
			/// separately. This is just for the first few output samples, but we can
			/// extrapolate the correct input-sample index for arbitrary output samples.
			std::vector<int32_t> first_index_;

			/// Weights on the input samples, for this output-sample index.
			std::vector<std::vector<float>> weights_;

			// the following variables keep track of where we are in a particular signal,
			// if it is being provided over multiple calls to Resample().

			int64_t input_sample_offset_; ///< The number of input samples we have
			///< already received for this signal
			///< (including anything in remainder_)
			int64_t output_sample_offset_; ///< The number of samples we have already
			///< output for this signal.
			std::vector<float> input_remainder_; ///< A small trailing part of the
			///< previously seen input signal.
			};