From 948b68774cebf2b9a2994b7b9b8102f9637a98f3 Mon Sep 17 00:00:00 2001
From: Shi Xian <40013335+R1ckShi@users.noreply.github.com>
Date: 星期二, 16 一月 2024 11:03:55 +0800
Subject: [PATCH] Merge pull request #1249 from alibaba-damo-academy/main

---
 runtime/onnxruntime/bin/funasr-onnx-2pass.cpp        |    8 
 runtime/websocket/bin/websocket-server.cpp           |   11 
 runtime/websocket/bin/websocket-server-2pass.h       |    8 
 runtime/websocket/bin/websocket-server.h             |    4 
 runtime/onnxruntime/bin/funasr-onnx-offline-rtf.cpp  |   18 -
 runtime/websocket/bin/websocket-server-2pass.cpp     |    8 
 runtime/run_server.sh                                |   11 
 runtime/onnxruntime/src/audio.cpp                    |  233 ++++++++++------------
 runtime/onnxruntime/src/tokenizer.cpp                |    4 
 runtime/onnxruntime/bin/funasr-onnx-offline.cpp      |   16 
 runtime/onnxruntime/src/funasrruntime.cpp            |   10 
 runtime/onnxruntime/src/bias-lm.h                    |    8 
 runtime/onnxruntime/bin/funasr-onnx-offline-punc.cpp |   10 
 runtime/onnxruntime/bin/funasr-onnx-online-asr.cpp   |   10 
 runtime/onnxruntime/src/alignedmem.cpp               |    4 
 README_zh.md                                         |   36 +-
 runtime/tools/utils/parse_options.sh                 |   97 +++++++++
 README.md                                            |    8 
 runtime/onnxruntime/bin/funasr-onnx-online-rtf.cpp   |   12 
 runtime/onnxruntime/src/encode_converter.cpp         |   14 
 runtime/onnxruntime/src/encode_converter.h           |    6 
 runtime/run_server_2pass.sh                          |    2 
 runtime/onnxruntime/bin/funasr-onnx-2pass-rtf.cpp    |    8 
 runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp  |   10 
 runtime/onnxruntime/bin/funasr-onnx-online-punc.cpp  |   10 
 runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp   |   10 
 26 files changed, 325 insertions(+), 251 deletions(-)

diff --git a/README.md b/README.md
index 05f6364..a53ce4d 100644
--- a/README.md
+++ b/README.md
@@ -61,7 +61,6 @@
 |                paraformer-zh-spk <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary)  [馃]() )                | speech recognition with speaker diarization, with timestamps, non-streaming |      60000 hours, Mandarin       |    220M    |
 | <nobr>paraformer-zh-online <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [馃]() )</nobr> |                        speech recognition, streaming                        |      60000 hours, Mandarin       |    220M    |
 |         paraformer-en <br> ( [猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [馃]() )         |             speech recognition, with timestamps, non-streaming              |       50000 hours, English       |    220M    |
-|                                                               paraformer-en-spk <br> ([猸怾()[馃]()  )                                                               |         speech recognition with speaker diarization, non-streaming          |               Undo               |    Undo    |
 |                     conformer-en <br> ( [猸怾(https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [馃]() )                      |                      speech recognition, non-streaming                      |       50000 hours, English       |    220M    |
 |                     ct-punc <br> ( [猸怾(https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [馃]() )                      |                           punctuation restoration                           |    100M, Mandarin and English    |    1.1G    | 
 |                          fsmn-vad <br> ( [猸怾(https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [馃]() )                          |                          voice activity detection                           | 5000 hours, Mandarin and English |    0.4M    | 
@@ -97,7 +96,7 @@
                   punc_model="ct-punc-c", punc_model_revision="v2.0.2", \
                   spk_model="cam++", spk_model_revision="v2.0.2")
 res = model(input=f"{model.model_path}/example/asr_example.wav", 
-            batch_size=16, 
+            batch_size=64, 
             hotword='榄旀惌')
 print(res)
 ```
@@ -135,7 +134,6 @@
 from funasr import AutoModel
 
 model = AutoModel(model="fsmn-vad", model_revision="v2.0.2")
-
 wav_file = f"{model.model_path}/example/asr_example.wav"
 res = model(input=wav_file)
 print(res)
@@ -167,7 +165,6 @@
 from funasr import AutoModel
 
 model = AutoModel(model="ct-punc", model_revision="v2.0.2")
-
 res = model(input="閭d粖澶╃殑浼氬氨鍒拌繖閲屽惂 happy new year 鏄庡勾瑙�")
 print(res)
 ```
@@ -176,9 +173,8 @@
 from funasr import AutoModel
 
 model = AutoModel(model="fa-zh", model_revision="v2.0.2")
-
 wav_file = f"{model.model_path}/example/asr_example.wav"
-text_file = f"{model.model_path}/example/asr_example.wav"
+text_file = f"{model.model_path}/example/text.txt"
 res = model(input=(wav_file, text_file), data_type=("sound", "text"))
 print(res)
 ```
diff --git a/README_zh.md b/README_zh.md
index 62d251b..861e61c 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -60,14 +60,13 @@
 |                                                                             妯″瀷鍚嶅瓧                                                                             |        浠诲姟璇︽儏        |     璁粌鏁版嵁     | 鍙傛暟閲�  |
 |:------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------:|:------------:|:----:|
 | paraformer-zh <br> ([猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)  [馃]() ) |  璇煶璇嗗埆锛屽甫鏃堕棿鎴宠緭鍑猴紝闈炲疄鏃�   |  60000灏忔椂锛屼腑鏂�  | 220M |
-|             paraformer-zh-spk <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary)  [馃]() )             | 鍒嗚鑹茶闊宠瘑鍒紝甯︽椂闂存埑杈撳嚭锛岄潪瀹炴椂 |  60000灏忔椂锛屼腑鏂�  | 220M |
-|   paraformer-zh-streaming <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [馃]() )   |      璇煶璇嗗埆锛屽疄鏃�       |  60000灏忔椂锛屼腑鏂�  | 220M |
-|      paraformer-en <br> ( [猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [馃]() )      | 璇煶璇嗗埆锛岄潪瀹炴椂 |  50000灏忔椂锛岃嫳鏂�  | 220M |
-|                                                            paraformer-en-spk <br> ([猸怾() [馃]() )                                                            |      璇煶璇嗗埆锛岄潪瀹炴椂      |  50000灏忔椂锛岃嫳鏂�  | 220M |
-|                  conformer-en <br> ( [猸怾(https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [馃]() )                   |      璇煶璇嗗埆锛岄潪瀹炴椂      |  50000灏忔椂锛岃嫳鏂�  | 220M |
-|                  ct-punc <br> ( [猸怾(https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [馃]() )                   |      鏍囩偣鎭㈠      |  100M锛屼腑鏂囦笌鑻辨枃  | 1.1G | 
-|                       fsmn-vad <br> ( [猸怾(https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [馃]() )                       |     璇煶绔偣妫�娴嬶紝瀹炴椂      | 5000灏忔椂锛屼腑鏂囦笌鑻辨枃 | 0.4M | 
-|                       fa-zh <br> ( [猸怾(https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [馃]() )                        |   瀛楃骇鍒椂闂存埑棰勬祴         |  50000灏忔椂锛屼腑鏂�  | 38M  |
+| paraformer-zh-spk <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary)  [馃]() )             | 鍒嗚鑹茶闊宠瘑鍒紝甯︽椂闂存埑杈撳嚭锛岄潪瀹炴椂 |  60000灏忔椂锛屼腑鏂�  | 220M |
+| paraformer-zh-streaming <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [馃]() )   |      璇煶璇嗗埆锛屽疄鏃�       |  60000灏忔椂锛屼腑鏂�  | 220M |
+| paraformer-en <br> ( [猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [馃]() )      | 璇煶璇嗗埆锛岄潪瀹炴椂 |  50000灏忔椂锛岃嫳鏂�  | 220M |
+| conformer-en <br> ( [猸怾(https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [馃]() )                   |      璇煶璇嗗埆锛岄潪瀹炴椂      |  50000灏忔椂锛岃嫳鏂�  | 220M |
+| ct-punc <br> ( [猸怾(https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [馃]() )                   |      鏍囩偣鎭㈠      |  100M锛屼腑鏂囦笌鑻辨枃  | 1.1G | 
+| fsmn-vad <br> ( [猸怾(https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [馃]() )                       |     璇煶绔偣妫�娴嬶紝瀹炴椂      | 5000灏忔椂锛屼腑鏂囦笌鑻辨枃 | 0.4M | 
+| fa-zh <br> ( [猸怾(https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [馃]() )                        |   瀛楃骇鍒椂闂存埑棰勬祴         |  50000灏忔椂锛屼腑鏂�  | 38M  |
 
 
 <a name="蹇�熷紑濮�"></a>
@@ -86,12 +85,15 @@
 ### 闈炲疄鏃惰闊宠瘑鍒�
 ```python
 from funasr import AutoModel
-
-model = AutoModel(model="paraformer-zh")
-# for the long duration wav, you could add vad model
-# model = AutoModel(model="paraformer-zh", vad_model="fsmn-vad", punc_model="ct-punc")
-
-res = model(input="asr_example_zh.wav", batch_size=64)
+# paraformer-zh is a multi-functional asr model
+# use vad, punc, spk or not as you need
+model = AutoModel(model="paraformer-zh", model_revision="v2.0.2", \
+                  vad_model="fsmn-vad", vad_model_revision="v2.0.2", \
+                  punc_model="ct-punc-c", punc_model_revision="v2.0.2", \
+                  spk_model="cam++", spk_model_revision="v2.0.2")
+res = model(input=f"{model.model_path}/example/asr_example.wav", 
+            batch_size=64, 
+            hotword='榄旀惌')
 print(res)
 ```
 娉細`model_hub`锛氳〃绀烘ā鍨嬩粨搴擄紝`ms`涓洪�夋嫨modelscope涓嬭浇锛宍hf`涓洪�夋嫨huggingface涓嬭浇銆�
@@ -105,7 +107,7 @@
 encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-attention
 decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
 
-model = AutoModel(model="paraformer-zh-streaming", model_revision="v2.0.0")
+model = AutoModel(model="paraformer-zh-streaming", model_revision="v2.0.2")
 
 import soundfile
 import os
@@ -163,7 +165,7 @@
 ```python
 from funasr import AutoModel
 
-model = AutoModel(model="ct-punc", model_revision="v2.0.1")
+model = AutoModel(model="ct-punc", model_revision="v2.0.2")
 
 res = model(input="閭d粖澶╃殑浼氬氨鍒拌繖閲屽惂 happy new year 鏄庡勾瑙�")
 print(res)
@@ -176,7 +178,7 @@
 model = AutoModel(model="fa-zh", model_revision="v2.0.0")
 
 wav_file = f"{model.model_path}/example/asr_example.wav"
-text_file = f"{model.model_path}/example/asr_example.wav"
+text_file = f"{model.model_path}/example/text.txt"
 res = model(input=(wav_file, text_file), data_type=("sound", "text"))
 print(res)
 ```
diff --git a/runtime/onnxruntime/bin/funasr-onnx-2pass-rtf.cpp b/runtime/onnxruntime/bin/funasr-onnx-2pass-rtf.cpp
index d49ba72..d4abacd 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-2pass-rtf.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-2pass-rtf.cpp
@@ -144,10 +144,10 @@
                 } else {
                     is_final = false;
             }
-            gettimeofday(&start, NULL);
+            gettimeofday(&start, nullptr);
             FUNASR_RESULT result = FunTpassInferBuffer(tpass_handle, tpass_online_handle, speech_buff+sample_offset, step, punc_cache, is_final, 
                                                         sampling_rate_, "pcm", (ASR_TYPE)asr_mode_, hotwords_embedding, true, decoder_handle);
-            gettimeofday(&end, NULL);
+            gettimeofday(&end, nullptr);
             seconds = (end.tv_sec - start.tv_sec);
             long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
             n_total_time += taking_micros;
@@ -272,7 +272,7 @@
     GetValue(asr_mode, ASR_MODE, model_path);
 
     struct timeval start, end;
-    gettimeofday(&start, NULL);
+    gettimeofday(&start, nullptr);
     int thread_num = onnx_thread.getValue();
     int asr_mode_ = -1;
     if(model_path[ASR_MODE] == "offline"){
@@ -301,7 +301,7 @@
         am_sc = am_scale.getValue();
     }
 
-    gettimeofday(&end, NULL);
+    gettimeofday(&end, nullptr);
     long seconds = (end.tv_sec - start.tv_sec);
     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
     LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
diff --git a/runtime/onnxruntime/bin/funasr-onnx-2pass.cpp b/runtime/onnxruntime/bin/funasr-onnx-2pass.cpp
index abcc4b2..6941151 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-2pass.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-2pass.cpp
@@ -97,7 +97,7 @@
     GetValue(asr_mode, ASR_MODE, model_path);
 
     struct timeval start, end;
-    gettimeofday(&start, NULL);
+    gettimeofday(&start, nullptr);
     int thread_num = onnx_thread.getValue();
     int asr_mode_ = -1;
     if(model_path[ASR_MODE] == "offline"){
@@ -128,7 +128,7 @@
     // init wfst decoder
     FUNASR_DEC_HANDLE decoder_handle = FunASRWfstDecoderInit(tpass_handle, ASR_TWO_PASS, glob_beam, lat_beam, am_sc);
 
-    gettimeofday(&end, NULL);
+    gettimeofday(&end, nullptr);
     long seconds = (end.tv_sec - start.tv_sec);
     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
     LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
@@ -214,11 +214,11 @@
                 } else {
                     is_final = false;
             }
-            gettimeofday(&start, NULL);
+            gettimeofday(&start, nullptr);
             FUNASR_RESULT result = FunTpassInferBuffer(tpass_handle, tpass_online_handle, 
                 speech_buff+sample_offset, step, punc_cache, is_final, sampling_rate_, "pcm", 
                 (ASR_TYPE)asr_mode_, hotwords_embedding, true, decoder_handle);
-            gettimeofday(&end, NULL);
+            gettimeofday(&end, nullptr);
             seconds = (end.tv_sec - start.tv_sec);
             taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
 
diff --git a/runtime/onnxruntime/bin/funasr-onnx-offline-punc.cpp b/runtime/onnxruntime/bin/funasr-onnx-offline-punc.cpp
index 8f3149e..f549aae 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-offline-punc.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-offline-punc.cpp
@@ -49,7 +49,7 @@
     GetValue(txt_path, TXT_PATH, model_path);
 
     struct timeval start, end;
-    gettimeofday(&start, NULL);
+    gettimeofday(&start, nullptr);
     int thread_num = 1;
     FUNASR_HANDLE punc_hanlde=CTTransformerInit(model_path, thread_num);
 
@@ -59,7 +59,7 @@
         exit(-1);
     }
 
-    gettimeofday(&end, NULL);
+    gettimeofday(&end, nullptr);
     long seconds = (end.tv_sec - start.tv_sec);
     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
     LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
@@ -83,9 +83,9 @@
     
     long taking_micros = 0;
     for(auto& txt_str : txt_list){
-        gettimeofday(&start, NULL);
-        FUNASR_RESULT result=CTTransformerInfer(punc_hanlde, txt_str.c_str(), RASR_NONE, NULL);
-        gettimeofday(&end, NULL);
+        gettimeofday(&start, nullptr);
+        FUNASR_RESULT result=CTTransformerInfer(punc_hanlde, txt_str.c_str(), RASR_NONE, nullptr);
+        gettimeofday(&end, nullptr);
         seconds = (end.tv_sec - start.tv_sec);
         taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
         string msg = FunASRGetResult(result, 0);
diff --git a/runtime/onnxruntime/bin/funasr-onnx-offline-rtf.cpp b/runtime/onnxruntime/bin/funasr-onnx-offline-rtf.cpp
index 83d7e79..5722693 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-offline-rtf.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-offline-rtf.cpp
@@ -54,7 +54,7 @@
     // warm up
     for (size_t i = 0; i < 1; i++)
     {
-        FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL, hotwords_embedding, audio_fs, true, decoder_handle);
+        FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[0].c_str(), RASR_NONE, nullptr, hotwords_embedding, audio_fs, true, decoder_handle);
         if(result){
             FunASRFreeResult(result);
         }
@@ -67,10 +67,10 @@
             break;
         }
 
-        gettimeofday(&start, NULL);
-        FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL, hotwords_embedding, audio_fs, true, decoder_handle);
+        gettimeofday(&start, nullptr);
+        FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[i].c_str(), RASR_NONE, nullptr, hotwords_embedding, audio_fs, true, decoder_handle);
 
-        gettimeofday(&end, NULL);
+        gettimeofday(&end, nullptr);
         seconds = (end.tv_sec - start.tv_sec);
         long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
         n_total_time += taking_micros;
@@ -115,10 +115,8 @@
 
 void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path)
 {
-    if (value_arg.isSet()){
-        model_path.insert({key, value_arg.getValue()});
-        LOG(INFO)<< key << " : " << value_arg.getValue();
-    }
+    model_path.insert({key, value_arg.getValue()});
+    LOG(INFO)<< key << " : " << value_arg.getValue();
 }
 
 int main(int argc, char *argv[])
@@ -176,7 +174,7 @@
     GetValue(wav_path, WAV_PATH, model_path);
 
     struct timeval start, end;
-    gettimeofday(&start, NULL);
+    gettimeofday(&start, nullptr);
     FUNASR_HANDLE asr_handle=FunOfflineInit(model_path, 1);
 
     if (!asr_handle)
@@ -185,7 +183,7 @@
         exit(-1);
     }
 
-    gettimeofday(&end, NULL);
+    gettimeofday(&end, nullptr);
     long seconds = (end.tv_sec - start.tv_sec);
     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
     LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
diff --git a/runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp b/runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp
index 950530a..d0cb2ee 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp
@@ -82,7 +82,7 @@
     GetValue(wav_path, WAV_PATH, model_path);
 
     struct timeval start, end;
-    gettimeofday(&start, NULL);
+    gettimeofday(&start, nullptr);
     int thread_num = 1;
     FUNASR_HANDLE vad_hanlde=FsmnVadInit(model_path, thread_num);
 
@@ -92,7 +92,7 @@
         exit(-1);
     }
 
-    gettimeofday(&end, NULL);
+    gettimeofday(&end, nullptr);
     long seconds = (end.tv_sec - start.tv_sec);
     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
     LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
@@ -132,9 +132,9 @@
     for (int i = 0; i < wav_list.size(); i++) {
         auto& wav_file = wav_list[i];
         auto& wav_id = wav_ids[i];
-        gettimeofday(&start, NULL);
-        FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), NULL, audio_fs.getValue());
-        gettimeofday(&end, NULL);
+        gettimeofday(&start, nullptr);
+        FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), nullptr, audio_fs.getValue());
+        gettimeofday(&end, nullptr);
         seconds = (end.tv_sec - start.tv_sec);
         taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
 
diff --git a/runtime/onnxruntime/bin/funasr-onnx-offline.cpp b/runtime/onnxruntime/bin/funasr-onnx-offline.cpp
index 4aaa002..edb83bd 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-offline.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-offline.cpp
@@ -32,10 +32,8 @@
 
 void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path)
 {
-    if (value_arg.isSet()){
-        model_path.insert({key, value_arg.getValue()});
-        LOG(INFO)<< key << " : " << value_arg.getValue();
-    }
+    model_path.insert({key, value_arg.getValue()});
+    LOG(INFO)<< key << " : " << value_arg.getValue();
 }
 
 int main(int argc, char** argv)
@@ -89,7 +87,7 @@
     GetValue(wav_path, WAV_PATH, model_path);
 
     struct timeval start, end;
-    gettimeofday(&start, NULL);
+    gettimeofday(&start, nullptr);
     int thread_num = 1;
     FUNASR_HANDLE asr_hanlde=FunOfflineInit(model_path, thread_num);
 
@@ -116,7 +114,7 @@
     LOG(INFO) << "hotword path: " << hotword_path;
     funasr::ExtractHws(hotword_path, hws_map, nn_hotwords_);
 
-    gettimeofday(&end, NULL);
+    gettimeofday(&end, nullptr);
     long seconds = (end.tv_sec - start.tv_sec);
     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
     LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
@@ -158,9 +156,9 @@
     for (int i = 0; i < wav_list.size(); i++) {
         auto& wav_file = wav_list[i];
         auto& wav_id = wav_ids[i];
-        gettimeofday(&start, NULL);
-        FUNASR_RESULT result=FunOfflineInfer(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL, hotwords_embedding, audio_fs.getValue(), true, decoder_handle);
-        gettimeofday(&end, NULL);
+        gettimeofday(&start, nullptr);
+        FUNASR_RESULT result=FunOfflineInfer(asr_hanlde, wav_file.c_str(), RASR_NONE, nullptr, hotwords_embedding, audio_fs.getValue(), true, decoder_handle);
+        gettimeofday(&end, nullptr);
         seconds = (end.tv_sec - start.tv_sec);
         taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
 
diff --git a/runtime/onnxruntime/bin/funasr-onnx-online-asr.cpp b/runtime/onnxruntime/bin/funasr-onnx-online-asr.cpp
index 72be6a1..1593e84 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-online-asr.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-online-asr.cpp
@@ -63,7 +63,7 @@
     GetValue(wav_path, WAV_PATH, model_path);
 
     struct timeval start, end;
-    gettimeofday(&start, NULL);
+    gettimeofday(&start, nullptr);
     int thread_num = 1;
     FUNASR_HANDLE asr_handle=FunASRInit(model_path, thread_num, ASR_ONLINE);
 
@@ -73,7 +73,7 @@
         exit(-1);
     }
 
-    gettimeofday(&end, NULL);
+    gettimeofday(&end, nullptr);
     long seconds = (end.tv_sec - start.tv_sec);
     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
     LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
@@ -144,9 +144,9 @@
                 } else {
                     is_final = false;
             }
-            gettimeofday(&start, NULL);
-            FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, sampling_rate_);
-            gettimeofday(&end, NULL);
+            gettimeofday(&start, nullptr);
+            FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, nullptr, is_final, sampling_rate_);
+            gettimeofday(&end, nullptr);
             seconds = (end.tv_sec - start.tv_sec);
             taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
 
diff --git a/runtime/onnxruntime/bin/funasr-onnx-online-punc.cpp b/runtime/onnxruntime/bin/funasr-onnx-online-punc.cpp
index e7a024c..5616fa4 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-online-punc.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-online-punc.cpp
@@ -69,7 +69,7 @@
     GetValue(txt_path, TXT_PATH, model_path);
 
     struct timeval start, end;
-    gettimeofday(&start, NULL);
+    gettimeofday(&start, nullptr);
     int thread_num = 1;
     FUNASR_HANDLE punc_hanlde=CTTransformerInit(model_path, thread_num, PUNC_ONLINE);
 
@@ -79,7 +79,7 @@
         exit(-1);
     }
 
-    gettimeofday(&end, NULL);
+    gettimeofday(&end, nullptr);
     long seconds = (end.tv_sec - start.tv_sec);
     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
     LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
@@ -107,16 +107,16 @@
         splitString(vad_strs, txt_str, "|");
         string str_out;
         FUNASR_RESULT result = nullptr;
-        gettimeofday(&start, NULL);
+        gettimeofday(&start, nullptr);
         for(auto& vad_str:vad_strs){
-            result=CTTransformerInfer(punc_hanlde, vad_str.c_str(), RASR_NONE, NULL, PUNC_ONLINE, result);
+            result=CTTransformerInfer(punc_hanlde, vad_str.c_str(), RASR_NONE, nullptr, PUNC_ONLINE, result);
             if(result){
                 string msg = CTTransformerGetResult(result, 0);
                 str_out += msg;
                 LOG(INFO)<<"Online result: "<<msg;
             }
         }
-        gettimeofday(&end, NULL);
+        gettimeofday(&end, nullptr);
         seconds = (end.tv_sec - start.tv_sec);
         taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
         LOG(INFO)<<"Results: "<<str_out;
diff --git a/runtime/onnxruntime/bin/funasr-onnx-online-rtf.cpp b/runtime/onnxruntime/bin/funasr-onnx-online-rtf.cpp
index 57a4cce..4830a3d 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-online-rtf.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-online-rtf.cpp
@@ -84,7 +84,7 @@
                 } else {
                     is_final = false;
             }
-            FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, sampling_rate_);
+            FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, nullptr, is_final, sampling_rate_);
             if (result)
             {
                 FunASRFreeResult(result);
@@ -130,9 +130,9 @@
                 } else {
                     is_final = false;
             }
-            gettimeofday(&start, NULL);
-            FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, sampling_rate_);
-            gettimeofday(&end, NULL);
+            gettimeofday(&start, nullptr);
+            FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, nullptr, is_final, sampling_rate_);
+            gettimeofday(&end, nullptr);
             seconds = (end.tv_sec - start.tv_sec);
             long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
             n_total_time += taking_micros;
@@ -210,7 +210,7 @@
     GetValue(wav_path, WAV_PATH, model_path);
 
     struct timeval start, end;
-    gettimeofday(&start, NULL);
+    gettimeofday(&start, nullptr);
     FUNASR_HANDLE asr_handle=FunASRInit(model_path, 1, ASR_ONLINE);
 
     if (!asr_handle)
@@ -219,7 +219,7 @@
         exit(-1);
     }
 
-    gettimeofday(&end, NULL);
+    gettimeofday(&end, nullptr);
     long seconds = (end.tv_sec - start.tv_sec);
     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
     LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
diff --git a/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp b/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
index 3f62672..6633dfe 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
@@ -89,7 +89,7 @@
     GetValue(wav_path, WAV_PATH, model_path);
 
     struct timeval start, end;
-    gettimeofday(&start, NULL);
+    gettimeofday(&start, nullptr);
     int thread_num = 1;
     FUNASR_HANDLE vad_hanlde=FsmnVadInit(model_path, thread_num);
 
@@ -99,7 +99,7 @@
         exit(-1);
     }
 
-    gettimeofday(&end, NULL);
+    gettimeofday(&end, nullptr);
     long seconds = (end.tv_sec - start.tv_sec);
     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
     LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
@@ -170,9 +170,9 @@
                 } else {
                     is_final = false;
             }
-            gettimeofday(&start, NULL);
-            FUNASR_RESULT result = FsmnVadInferBuffer(online_hanlde, speech_buff+sample_offset, step, NULL, is_final, sampling_rate_);
-            gettimeofday(&end, NULL);
+            gettimeofday(&start, nullptr);
+            FUNASR_RESULT result = FsmnVadInferBuffer(online_hanlde, speech_buff+sample_offset, step, nullptr, is_final, sampling_rate_);
+            gettimeofday(&end, nullptr);
             seconds = (end.tv_sec - start.tv_sec);
             taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
 
diff --git a/runtime/onnxruntime/src/alignedmem.cpp b/runtime/onnxruntime/src/alignedmem.cpp
index 9c7d323..b72c623 100644
--- a/runtime/onnxruntime/src/alignedmem.cpp
+++ b/runtime/onnxruntime/src/alignedmem.cpp
@@ -6,8 +6,8 @@
     void *p1;  // original block
     void **p2; // aligned block
     int offset = alignment - 1 + sizeof(void *);
-    if ((p1 = (void *)malloc(required_bytes + offset)) == NULL) {
-        return NULL;
+    if ((p1 = (void *)malloc(required_bytes + offset)) == nullptr) {
+        return nullptr;
     }
     p2 = (void **)(((size_t)(p1) + offset) & ~(alignment - 1));
     p2[-1] = p1;
diff --git a/runtime/onnxruntime/src/audio.cpp b/runtime/onnxruntime/src/audio.cpp
index c471329..6f829cc 100644
--- a/runtime/onnxruntime/src/audio.cpp
+++ b/runtime/onnxruntime/src/audio.cpp
@@ -133,6 +133,7 @@
     };
     ~AudioWindow(){
         free(window);
+        window = nullptr;
     };
     int put(int val)
     {
@@ -160,8 +161,9 @@
     len = end - start;
 }
 AudioFrame::~AudioFrame(){
-    if(data != NULL){
+    if(data != nullptr){
         free(data);
+        data = nullptr;
     }
 }
 int AudioFrame::SetStart(int val)
@@ -195,38 +197,41 @@
 
 Audio::Audio(int data_type) : dest_sample_rate(MODEL_SAMPLE_RATE), data_type(data_type)
 {
-    speech_buff = NULL;
-    speech_data = NULL;
+    speech_buff = nullptr;
+    speech_data = nullptr;
     align_size = 1360;
     seg_sample = dest_sample_rate / 1000;
 }
 
 Audio::Audio(int model_sample_rate, int data_type) : dest_sample_rate(model_sample_rate), data_type(data_type)
 {
-    speech_buff = NULL;
-    speech_data = NULL;
+    speech_buff = nullptr;
+    speech_data = nullptr;
     align_size = 1360;
     seg_sample = dest_sample_rate / 1000;
 }
 
 Audio::Audio(int model_sample_rate, int data_type, int size) : dest_sample_rate(model_sample_rate), data_type(data_type)
 {
-    speech_buff = NULL;
-    speech_data = NULL;
+    speech_buff = nullptr;
+    speech_data = nullptr;
     align_size = (float)size;
     seg_sample = dest_sample_rate / 1000;
 }
 
 Audio::~Audio()
 {
-    if (speech_buff != NULL) {
+    if (speech_buff != nullptr) {
         free(speech_buff);
+        speech_buff = nullptr;
     }
-    if (speech_data != NULL) {
+    if (speech_data != nullptr) {
         free(speech_data);
+        speech_data = nullptr;
     }
-    if (speech_char != NULL) {
+    if (speech_char != nullptr) {
         free(speech_char);
+        speech_char = nullptr;
     }
     ClearQueue(frame_queue);
     ClearQueue(asr_online_queue);
@@ -269,8 +274,9 @@
     resampler->Resample(waveform, n, true, &samples);
     //reset speech_data
     speech_len = samples.size();
-    if (speech_data != NULL) {
+    if (speech_data != nullptr) {
         free(speech_data);
+        speech_data = nullptr;
     }
     speech_data = (float*)malloc(sizeof(float) * speech_len);
     memset(speech_data, 0, sizeof(float) * speech_len);
@@ -283,21 +289,21 @@
 #else
     // from file
     AVFormatContext* formatContext = avformat_alloc_context();
-    if (avformat_open_input(&formatContext, filename, NULL, NULL) != 0) {
+    if (avformat_open_input(&formatContext, filename, nullptr, nullptr) != 0) {
         LOG(ERROR) << "Error: Could not open input file.";
         avformat_close_input(&formatContext);
         avformat_free_context(formatContext);
         return false;
     }
 
-    if (avformat_find_stream_info(formatContext, NULL) < 0) {
+    if (avformat_find_stream_info(formatContext, nullptr) < 0) {
         LOG(ERROR) << "Error: Could not open input file.";
         avformat_close_input(&formatContext);
         avformat_free_context(formatContext);
         return false;
     }
-    const AVCodec* codec = NULL;
-    AVCodecParameters* codecParameters = NULL;
+    const AVCodec* codec = nullptr;
+    AVCodecParameters* codecParameters = nullptr;
     int audioStreamIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
     if (audioStreamIndex >= 0) {
         codecParameters = formatContext->streams[audioStreamIndex]->codecpar;
@@ -321,7 +327,7 @@
         avcodec_free_context(&codecContext);
         return false;
     }
-    if (avcodec_open2(codecContext, codec, NULL) < 0) {
+    if (avcodec_open2(codecContext, codec, nullptr) < 0) {
         LOG(ERROR) << "Error: Could not open audio decoder.";
         avformat_close_input(&formatContext);
         avformat_free_context(formatContext);
@@ -400,14 +406,13 @@
     av_packet_free(&packet);
     av_frame_free(&frame);
 
-    if (speech_data != NULL) {
+    if (speech_data != nullptr) {
         free(speech_data);
+        speech_data = nullptr;
     }
-    if (speech_buff != NULL) {
-        free(speech_buff);
-    }
-    if (speech_char != NULL) {
+    if (speech_char != nullptr) {
         free(speech_char);
+        speech_char = nullptr;
     }
     offset = 0;
     
@@ -418,30 +423,25 @@
     }
 
     speech_len = (resampled_buffers.size()) / 2;
-    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
-    if (speech_buff)
-    {
-        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
-        memcpy((void*)speech_buff, (const void*)resampled_buffers.data(), speech_len * sizeof(int16_t));
-
-        speech_data = (float*)malloc(sizeof(float) * speech_len);
+    speech_data = (float*)malloc(sizeof(float) * speech_len);
+    if(speech_data){
         memset(speech_data, 0, sizeof(float) * speech_len);
-
         float scale = 1;
         if (data_type == 1) {
-            scale = 32768;
+            scale = 32768.0f;
         }
-        for (int32_t i = 0; i != speech_len; ++i) {
-            speech_data[i] = (float)speech_buff[i] / scale;
+        for (int32_t i = 0; i < speech_len; ++i) {
+            int16_t val = (int16_t)((resampled_buffers[2 * i + 1] << 8) | resampled_buffers[2 * i]);
+            speech_data[i] = (float)val / scale;
         }
-
         AudioFrame* frame = new AudioFrame(speech_len);
         frame_queue.push(frame);
     
         return true;
-    }
-    else
+    }else{
         return false;
+    }
+
 #endif
 }
 
@@ -468,7 +468,7 @@
     }
     AVFormatContext* formatContext = avformat_alloc_context();
     formatContext->pb = avio_ctx;
-    if (avformat_open_input(&formatContext, "", NULL, NULL) != 0) {
+    if (avformat_open_input(&formatContext, "", nullptr, nullptr) != 0) {
         LOG(ERROR) << "Error: Could not open input file.";
         avio_context_free(&avio_ctx);
         avformat_close_input(&formatContext);
@@ -476,15 +476,15 @@
         return false;
     }
 
-    if (avformat_find_stream_info(formatContext, NULL) < 0) {
+    if (avformat_find_stream_info(formatContext, nullptr) < 0) {
         LOG(ERROR) << "Error: Could not find stream information.";
         avio_context_free(&avio_ctx);
         avformat_close_input(&formatContext);
         avformat_free_context(formatContext);
         return false;
     }
-    const AVCodec* codec = NULL;
-    AVCodecParameters* codecParameters = NULL;
+    const AVCodec* codec = nullptr;
+    AVCodecParameters* codecParameters = nullptr;
     int audioStreamIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
     if (audioStreamIndex >= 0) {
         codecParameters = formatContext->streams[audioStreamIndex]->codecpar;
@@ -505,7 +505,7 @@
         avcodec_free_context(&codecContext);
         return false;
     }
-    if (avcodec_open2(codecContext, codec, NULL) < 0) {
+    if (avcodec_open2(codecContext, codec, nullptr) < 0) {
         LOG(ERROR) << "Error: Could not open audio decoder.";
         avio_context_free(&avio_ctx);
         avformat_close_input(&formatContext);
@@ -590,39 +590,31 @@
     av_packet_free(&packet);
     av_frame_free(&frame);
 
-    if (speech_data != NULL) {
+    if (speech_data != nullptr) {
         free(speech_data);
+        speech_data = nullptr;
     }
-    if (speech_buff != NULL) {
-        free(speech_buff);
-    }
-    offset = 0;
 
     speech_len = (resampled_buffers.size()) / 2;
-    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
-    if (speech_buff)
-    {
-        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
-        memcpy((void*)speech_buff, (const void*)resampled_buffers.data(), speech_len * sizeof(int16_t));
-
-        speech_data = (float*)malloc(sizeof(float) * speech_len);
+    speech_data = (float*)malloc(sizeof(float) * speech_len);
+    if(speech_data){
         memset(speech_data, 0, sizeof(float) * speech_len);
-
         float scale = 1;
         if (data_type == 1) {
-            scale = 32768;
+            scale = 32768.0f;
         }
-        for (int32_t i = 0; i != speech_len; ++i) {
-            speech_data[i] = (float)speech_buff[i] / scale;
+        for (int32_t i = 0; i < speech_len; ++i) {
+            int16_t val = (int16_t)((resampled_buffers[2 * i + 1] << 8) | resampled_buffers[2 * i]);
+            speech_data[i] = (float)val / scale;
         }
-
         AudioFrame* frame = new AudioFrame(speech_len);
         frame_queue.push(frame);
     
         return true;
-    }
-    else
+    }else{
         return false;
+    }
+
 #endif
 }
 
@@ -630,11 +622,13 @@
 bool Audio::LoadWav(const char *filename, int32_t* sampling_rate, bool resample)
 {
     WaveHeader header;
-    if (speech_data != NULL) {
+    if (speech_data != nullptr) {
         free(speech_data);
+        speech_data = nullptr;
     }
-    if (speech_buff != NULL) {
+    if (speech_buff != nullptr) {
         free(speech_buff);
+        speech_buff = nullptr;
     }
     
     offset = 0;
@@ -705,8 +699,9 @@
 bool Audio::LoadWav2Char(const char *filename, int32_t* sampling_rate)
 {
     WaveHeader header;
-    if (speech_char != NULL) {
+    if (speech_char != nullptr) {
         free(speech_char);
+        speech_char = nullptr;
     }
     offset = 0;
     std::ifstream is(filename, std::ifstream::binary);
@@ -744,13 +739,14 @@
 bool Audio::LoadWav(const char* buf, int n_file_len, int32_t* sampling_rate)
 { 
     WaveHeader header;
-    if (speech_data != NULL) {
+    if (speech_data != nullptr) {
         free(speech_data);
+        speech_data = nullptr;
     }
-    if (speech_buff != NULL) {
+    if (speech_buff != nullptr) {
         free(speech_buff);
+        speech_buff = nullptr;
     }
-    offset = 0;
 
     std::memcpy(&header, buf, sizeof(header));
 
@@ -790,33 +786,24 @@
 
 bool Audio::LoadPcmwav(const char* buf, int n_buf_len, int32_t* sampling_rate)
 {
-    if (speech_data != NULL) {
+    if (speech_data != nullptr) {
         free(speech_data);
+        speech_data = nullptr;
     }
-    if (speech_buff != NULL) {
-        free(speech_buff);
-    }
-    offset = 0;
 
     speech_len = n_buf_len / 2;
-    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
-    if (speech_buff)
-    {
-        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
-        memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
-
-        speech_data = (float*)malloc(sizeof(float) * speech_len);
-        memset(speech_data, 0, sizeof(float) * speech_len);
-
+    speech_data = (float*)malloc(sizeof(float) * speech_len);
+    if(speech_data){
         float scale = 1;
         if (data_type == 1) {
-            scale = 32768;
+            scale = 32768.0f;
+        }
+        const uint8_t* byte_buf = reinterpret_cast<const uint8_t*>(buf);
+        for (int32_t i = 0; i < speech_len; ++i) {
+            int16_t val = (int16_t)((byte_buf[2 * i + 1] << 8) | byte_buf[2 * i]);
+            speech_data[i] = (float)val / scale;
         }
 
-        for (int32_t i = 0; i != speech_len; ++i) {
-            speech_data[i] = (float)speech_buff[i] / scale;
-        }
-        
         //resample
         if(*sampling_rate != dest_sample_rate){
             WavResample(*sampling_rate, speech_data, speech_len);
@@ -824,44 +811,33 @@
 
         AudioFrame* frame = new AudioFrame(speech_len);
         frame_queue.push(frame);
+    
         return true;
-
-    }
-    else
+    }else{
         return false;
+    }
 }
 
 bool Audio::LoadPcmwavOnline(const char* buf, int n_buf_len, int32_t* sampling_rate)
 {
-    if (speech_data != NULL) {
+    if (speech_data != nullptr) {
         free(speech_data);
-    }
-    if (speech_buff != NULL) {
-        free(speech_buff);
-    }
-    if (speech_char != NULL) {
-        free(speech_char);
+        speech_data = nullptr;
     }
 
     speech_len = n_buf_len / 2;
-    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
-    if (speech_buff)
-    {
-        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
-        memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
-
-        speech_data = (float*)malloc(sizeof(float) * speech_len);
-        memset(speech_data, 0, sizeof(float) * speech_len);
-
+    speech_data = (float*)malloc(sizeof(float) * speech_len);
+    if(speech_data){
         float scale = 1;
         if (data_type == 1) {
-            scale = 32768;
+            scale = 32768.0f;
+        }
+        const uint8_t* byte_buf = reinterpret_cast<const uint8_t*>(buf);
+        for (int32_t i = 0; i < speech_len; ++i) {
+            int16_t val = (int16_t)((byte_buf[2 * i + 1] << 8) | byte_buf[2 * i]);
+            speech_data[i] = (float)val / scale;
         }
 
-        for (int32_t i = 0; i != speech_len; ++i) {
-            speech_data[i] = (float)speech_buff[i] / scale;
-        }
-        
         //resample
         if(*sampling_rate != dest_sample_rate){
             WavResample(*sampling_rate, speech_data, speech_len);
@@ -873,20 +849,22 @@
 
         AudioFrame* frame = new AudioFrame(speech_len);
         frame_queue.push(frame);
+    
         return true;
-
-    }
-    else
+    }else{
         return false;
+    }
 }
 
 bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate, bool resample)
 {
-    if (speech_data != NULL) {
+    if (speech_data != nullptr) {
         free(speech_data);
+        speech_data = nullptr;
     }
-    if (speech_buff != NULL) {
+    if (speech_buff != nullptr) {
         free(speech_buff);
+        speech_buff = nullptr;
     }
     offset = 0;
 
@@ -937,8 +915,9 @@
 
 bool Audio::LoadPcmwav2Char(const char* filename, int32_t* sampling_rate)
 {
-    if (speech_char != NULL) {
+    if (speech_char != nullptr) {
         free(speech_char);
+        speech_char = nullptr;
     }
     offset = 0;
 
@@ -964,8 +943,9 @@
 
 bool Audio::LoadOthers2Char(const char* filename)
 {
-    if (speech_char != NULL) {
+    if (speech_char != nullptr) {
         free(speech_char);
+        speech_char = nullptr;
     }
 
     FILE* fp;
@@ -1070,6 +1050,7 @@
         new_data[tmp_off + i] = speech_data[ii];
     }
     free(speech_data);
+    speech_data = nullptr;
     speech_data = new_data;
     speech_len = num_new_samples;
 
@@ -1088,7 +1069,7 @@
     frame_queue.pop();
     int sp_len = frame->GetLen();
     delete frame;
-    frame = NULL;
+    frame = nullptr;
 
     std::vector<float> pcm_data(speech_data, speech_data+sp_len);
     vector<std::vector<int>> vad_segments = (offline_stream->vad_handle)->Infer(pcm_data);
@@ -1100,7 +1081,7 @@
         frame->SetStart(start);
         frame->SetEnd(end);
         frame_queue.push(frame);
-        frame = NULL;
+        frame = nullptr;
     }
 }
 
@@ -1112,7 +1093,7 @@
     frame_queue.pop();
     int sp_len = frame->GetLen();
     delete frame;
-    frame = NULL;
+    frame = nullptr;
 
     std::vector<float> pcm_data(speech_data, speech_data+sp_len);
     vad_segments = vad_obj->Infer(pcm_data, input_finished);
@@ -1127,7 +1108,7 @@
     frame_queue.pop();
     int sp_len = frame->GetLen();
     delete frame;
-    frame = NULL;
+    frame = nullptr;
 
     std::vector<float> pcm_data(speech_data, speech_data+sp_len);
     vector<std::vector<int>> vad_segments = vad_obj->Infer(pcm_data, input_finished);
@@ -1148,7 +1129,7 @@
                     frame->data = (float*)malloc(sizeof(float) * step);
                     memcpy(frame->data, all_samples.data()+start-offset, step*sizeof(float));
                     asr_online_queue.push(frame);
-                    frame = NULL;
+                    frame = nullptr;
                     speech_start += step/seg_sample;
                 }
             }
@@ -1176,7 +1157,7 @@
                     frame->data = (float*)malloc(sizeof(float) * (end-start));
                     memcpy(frame->data, all_samples.data()+start-offset, (end-start)*sizeof(float));
                     asr_online_queue.push(frame);
-                    frame = NULL;
+                    frame = nullptr;
                 }
 
                 if(asr_mode != ASR_ONLINE){
@@ -1187,7 +1168,7 @@
                     frame->data = (float*)malloc(sizeof(float) * (end-start));
                     memcpy(frame->data, all_samples.data()+start-offset, (end-start)*sizeof(float));
                     asr_offline_queue.push(frame);
-                    frame = NULL;
+                    frame = nullptr;
                 }
 
                 speech_start = -1;
@@ -1210,7 +1191,7 @@
                         frame->data = (float*)malloc(sizeof(float) * step);
                         memcpy(frame->data, all_samples.data()+start-offset, step*sizeof(float));
                         asr_online_queue.push(frame);
-                        frame = NULL;
+                        frame = nullptr;
                         speech_start += step/seg_sample;
                     }
                 }
@@ -1235,7 +1216,7 @@
                     frame->data = (float*)malloc(sizeof(float) * (end-offline_start));
                     memcpy(frame->data, all_samples.data()+offline_start-offset, (end-offline_start)*sizeof(float));
                     asr_offline_queue.push(frame);
-                    frame = NULL;
+                    frame = nullptr;
                 }
 
                 if(asr_mode != ASR_OFFLINE){
@@ -1253,7 +1234,7 @@
                             frame->data = (float*)malloc(sizeof(float) * step);
                             memcpy(frame->data, all_samples.data()+start-offset+sample_offset, step*sizeof(float));
                             asr_online_queue.push(frame);
-                            frame = NULL;
+                            frame = nullptr;
                         }
                     }else{
                         frame = new AudioFrame(0);
@@ -1261,7 +1242,7 @@
                         frame->global_start = speech_start;   // in this case start >= end
                         frame->global_end = speech_end_i;
                         asr_online_queue.push(frame);
-                        frame = NULL;
+                        frame = nullptr;
                     }
                 }
                 speech_start = -1;
diff --git a/runtime/onnxruntime/src/bias-lm.h b/runtime/onnxruntime/src/bias-lm.h
index 957197a..ddaf3e4 100644
--- a/runtime/onnxruntime/src/bias-lm.h
+++ b/runtime/onnxruntime/src/bias-lm.h
@@ -48,7 +48,7 @@
     std::vector<std::vector<int>> split_id_vec;
 
     struct timeval start, end;
-    gettimeofday(&start, NULL);
+    gettimeofday(&start, nullptr);
 
     LoadCfgFromYaml(cfg_file.c_str(), opt_);
     while (getline(ifs_hws, line)) {
@@ -86,7 +86,7 @@
     BuildGraph(split_id_vec, custom_weight);
     ifs_hws.close();
 
-    gettimeofday(&end, NULL);
+    gettimeofday(&end, nullptr);
     long seconds = (end.tv_sec - start.tv_sec);
     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
     LOG(INFO) << "Build bias lm takes " << (double)modle_init_micros / 1000000 << " s";
@@ -99,7 +99,7 @@
     std::vector<std::vector<int>> split_id_vec;
 
     struct timeval start, end;
-    gettimeofday(&start, NULL);
+    gettimeofday(&start, nullptr);
     opt_.incre_bias_ = inc_bias;
     for (const pair<string, int>& kv : hws_map) {
       float score = 1.0f;
@@ -128,7 +128,7 @@
     }
     BuildGraph(split_id_vec, custom_weight);
 
-    gettimeofday(&end, NULL);
+    gettimeofday(&end, nullptr);
     long seconds = (end.tv_sec - start.tv_sec);
     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
     LOG(INFO) << "Build bias lm takes " << (double)modle_init_micros / 1000000 << " s";
diff --git a/runtime/onnxruntime/src/encode_converter.cpp b/runtime/onnxruntime/src/encode_converter.cpp
index 6c1097e..2ba6109 100644
--- a/runtime/onnxruntime/src/encode_converter.cpp
+++ b/runtime/onnxruntime/src/encode_converter.cpp
@@ -441,7 +441,7 @@
 }
 
 bool EncodeConverter::IsAllChineseCharactor(const U8CHAR_T* pu8, size_t ilen) {
-    if (pu8 == NULL || ilen <= 0) {
+    if (pu8 == nullptr || ilen <= 0) {
         return false;
     }
 
@@ -458,7 +458,7 @@
 }
 
 bool EncodeConverter::HasAlpha(const U8CHAR_T* pu8, size_t ilen) {
-  if (pu8 == NULL || ilen <= 0) {
+  if (pu8 == nullptr || ilen <= 0) {
     return false;
   }
   for (size_t i = 0; i < ilen; i++) {
@@ -471,7 +471,7 @@
 
 
 bool EncodeConverter::IsAllAlpha(const U8CHAR_T* pu8, size_t ilen) {
-  if (pu8 == NULL || ilen <= 0) {
+  if (pu8 == nullptr || ilen <= 0) {
     return false;
   }
   for (size_t i = 0; i < ilen; i++) {
@@ -483,7 +483,7 @@
 }
 
 bool EncodeConverter::IsAllAlphaAndPunct(const U8CHAR_T* pu8, size_t ilen) {
-  if (pu8 == NULL || ilen <= 0) {
+  if (pu8 == nullptr || ilen <= 0) {
     return false;
   }
   bool flag1 = HasAlpha(pu8, ilen);
@@ -500,7 +500,7 @@
 }
 
 bool EncodeConverter::IsAllAlphaAndDigit(const U8CHAR_T* pu8, size_t ilen) {
-  if (pu8 == NULL || ilen <= 0) {
+  if (pu8 == nullptr || ilen <= 0) {
     return false;
   }
   bool flag1 = HasAlpha(pu8, ilen);
@@ -516,7 +516,7 @@
   return true;
 }
 bool EncodeConverter::IsAllAlphaAndDigitAndBlank(const U8CHAR_T* pu8, size_t ilen) {
-  if (pu8 == NULL || ilen <= 0) {
+  if (pu8 == nullptr || ilen <= 0) {
     return false;
   }
   for (size_t i = 0; i < ilen; i++) {
@@ -529,7 +529,7 @@
 bool EncodeConverter::NeedAddTailBlank(std::string str) {
   U8CHAR_T *pu8 = (U8CHAR_T*)str.data();
   size_t ilen = str.size();
-  if (pu8 == NULL || ilen <= 0) {
+  if (pu8 == nullptr || ilen <= 0) {
     return false;
   }
   if (IsAllAlpha(pu8, ilen) || IsAllAlphaAndPunct(pu8, ilen) || IsAllAlphaAndDigit(pu8, ilen)) {
diff --git a/runtime/onnxruntime/src/encode_converter.h b/runtime/onnxruntime/src/encode_converter.h
index f8d3b23..a135eb6 100644
--- a/runtime/onnxruntime/src/encode_converter.h
+++ b/runtime/onnxruntime/src/encode_converter.h
@@ -88,15 +88,15 @@
 #ifdef _MSC_VER
         // convert to the local ansi page
         static std::string UTF8ToLocaleAnsi(const std::string& strUTF8) {
-            int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);
+            int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, nullptr, 0);
             unsigned short*wszGBK = new unsigned short[len + 1];
             memset(wszGBK, 0, len * 2 + 2);
             MultiByteToWideChar(CP_UTF8, 0, (LPCCH)strUTF8.c_str(), -1, (LPWSTR)wszGBK, len);
 
-            len = WideCharToMultiByte(CP_ACP, 0, (LPCWCH)wszGBK, -1, NULL, 0, NULL, NULL);
+            len = WideCharToMultiByte(CP_ACP, 0, (LPCWCH)wszGBK, -1, nullptr, 0, nullptr, nullptr);
             char *szGBK = new char[len + 1];
             memset(szGBK, 0, len + 1);
-            WideCharToMultiByte(CP_ACP, 0, (LPCWCH)wszGBK, -1, szGBK, len, NULL, NULL);
+            WideCharToMultiByte(CP_ACP, 0, (LPCWCH)wszGBK, -1, szGBK, len, nullptr, nullptr);
             std::string strTemp(szGBK);
             delete[]szGBK;
             delete[]wszGBK;
diff --git a/runtime/onnxruntime/src/funasrruntime.cpp b/runtime/onnxruntime/src/funasrruntime.cpp
index fdaf69d..0ca4ded 100644
--- a/runtime/onnxruntime/src/funasrruntime.cpp
+++ b/runtime/onnxruntime/src/funasrruntime.cpp
@@ -480,7 +480,7 @@
 		
 		audio->Split(vad_online_handle, chunk_len, input_finished, mode);
 
-		funasr::AudioFrame* frame = NULL;
+		funasr::AudioFrame* frame = nullptr;
 		while(audio->FetchChunck(frame) > 0){
 			string msg = ((funasr::ParaformerOnline*)asr_online_handle)->Forward(frame->data, frame->len, frame->is_final);
 			if(mode == ASR_ONLINE){
@@ -504,9 +504,9 @@
 			}else if(mode == ASR_TWO_PASS){
 				p_result->msg += msg;
 			}
-			if(frame != NULL){
+			if(frame != nullptr){
 				delete frame;
-				frame = NULL;
+				frame = nullptr;
 			}
 		}
 
@@ -561,9 +561,9 @@
 			if (!(p_result->stamp).empty()){
 				p_result->stamp_sents = funasr::TimestampSentence(p_result->tpass_msg, p_result->stamp);
 			}
-			if(frame != NULL){
+			if(frame != nullptr){
 				delete frame;
-				frame = NULL;
+				frame = nullptr;
 			}
 		}
 
diff --git a/runtime/onnxruntime/src/tokenizer.cpp b/runtime/onnxruntime/src/tokenizer.cpp
index f56601a..7618282 100644
--- a/runtime/onnxruntime/src/tokenizer.cpp
+++ b/runtime/onnxruntime/src/tokenizer.cpp
@@ -53,8 +53,8 @@
 
         SetJiebaRes(jieba_dict_trie_, jieba_model_);
     }else {
-        jieba_dict_trie_ = NULL;
-        jieba_model_ = NULL;
+        jieba_dict_trie_ = nullptr;
+        jieba_model_ = nullptr;
     }
 }
 
diff --git a/runtime/run_server.sh b/runtime/run_server.sh
index aa33f0f..4e58464 100644
--- a/runtime/run_server.sh
+++ b/runtime/run_server.sh
@@ -2,20 +2,21 @@
 download_model_dir="/workspace/models"
 model_dir="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx"
 vad_dir="damo/speech_fsmn_vad_zh-cn-16k-common-onnx"
-punc_dir="damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx"
-itn_dir="thuduj12/fst_itn_zh"
-lm_dir="damo/speech_ngram_lm_zh-cn-ai-wesp-fst"
+punc_dir=""
+itn_dir=""
+lm_dir=""
 port=10095
 certfile="../../../ssl_key/server.crt"
 keyfile="../../../ssl_key/server.key"
 hotword="../../hotwords.txt"
 # set decoder_thread_num
 decoder_thread_num=$(cat /proc/cpuinfo | grep "processor"|wc -l) || { echo "Get cpuinfo failed. Set decoder_thread_num = 32"; decoder_thread_num=32; }
+decoder_thread_num=8
 multiple_io=16
 io_thread_num=$(( (decoder_thread_num + multiple_io - 1) / multiple_io ))
-model_thread_num=1
+model_thread_num=5
 
-. ../egs/aishell/transformer/utils/parse_options.sh || exit 1;
+. ./tools/utils/parse_options.sh || exit 1;
 
 if [ -z "$certfile" ] || [ "$certfile" = "0" ]; then
   certfile=""
diff --git a/runtime/run_server_2pass.sh b/runtime/run_server_2pass.sh
index 2fc5f11..1148c60 100644
--- a/runtime/run_server_2pass.sh
+++ b/runtime/run_server_2pass.sh
@@ -16,7 +16,7 @@
 io_thread_num=$(( (decoder_thread_num + multiple_io - 1) / multiple_io ))
 model_thread_num=1
 
-. ../egs/aishell/transformer/utils/parse_options.sh || exit 1;
+. ./tools/utils/parse_options.sh || exit 1;
 
 if [ -z "$certfile" ] || [ "$certfile" = "0" ]; then
   certfile=""
diff --git a/runtime/tools/utils/parse_options.sh b/runtime/tools/utils/parse_options.sh
new file mode 100755
index 0000000..71fb9e5
--- /dev/null
+++ b/runtime/tools/utils/parse_options.sh
@@ -0,0 +1,97 @@
+#!/usr/bin/env bash
+
+# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
+#                 Arnab Ghoshal, Karel Vesely
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Parse command-line options.
+# To be sourced by another script (as in ". parse_options.sh").
+# Option format is: --option-name arg
+# and shell variable "option_name" gets set to value "arg."
+# The exception is --help, which takes no arguments, but prints the
+# $help_message variable (if defined).
+
+
+###
+### The --config file options have lower priority to command line
+### options, so we need to import them first...
+###
+
+# Now import all the configs specified by command-line, in left-to-right order
+for ((argpos=1; argpos<$#; argpos++)); do
+  if [ "${!argpos}" == "--config" ]; then
+    argpos_plus1=$((argpos+1))
+    config=${!argpos_plus1}
+    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
+    . $config  # source the config file.
+  fi
+done
+
+
+###
+### Now we process the command line options
+###
+while true; do
+  [ -z "${1:-}" ] && break;  # break if there are no arguments
+  case "$1" in
+    # If the enclosing script is called with --help option, print the help
+    # message and exit.  Scripts should put help messages in $help_message
+    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
+      else printf "$help_message\n" 1>&2 ; fi;
+      exit 0 ;;
+    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
+      exit 1 ;;
+    # If the first command-line argument begins with "--" (e.g. --foo-bar),
+    # then work out the variable name as $name, which will equal "foo_bar".
+    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
+      # Next we test whether the variable in question is undefned-- if so it's
+      # an invalid option and we die.  Note: $0 evaluates to the name of the
+      # enclosing script.
+      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
+      # is undefined.  We then have to wrap this test inside "eval" because
+      # foo_bar is itself inside a variable ($name).
+      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
+
+      oldval="`eval echo \\$$name`";
+      # Work out whether we seem to be expecting a Boolean argument.
+      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
+        was_bool=true;
+      else
+        was_bool=false;
+      fi
+
+      # Set the variable to the right value-- the escaped quotes make it work if
+      # the option had spaces, like --cmd "queue.pl -sync y"
+      eval $name=\"$2\";
+
+      # Check that Boolean-valued arguments are really Boolean.
+      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
+        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
+        exit 1;
+      fi
+      shift 2;
+      ;;
+  *) break;
+  esac
+done
+
+
+# Check for an empty argument to the --cmd option, which can easily occur as a
+# result of scripting errors.
+[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
+
+
+true; # so this script returns exit code 0.
diff --git a/runtime/websocket/bin/websocket-server-2pass.cpp b/runtime/websocket/bin/websocket-server-2pass.cpp
index c251e1d..954ffae 100644
--- a/runtime/websocket/bin/websocket-server-2pass.cpp
+++ b/runtime/websocket/bin/websocket-server-2pass.cpp
@@ -409,7 +409,7 @@
       }
 
       // hotwords: fst/nn
-      if(msg_data->hotwords_embedding == NULL){
+      if(msg_data->hotwords_embedding == nullptr){
         std::unordered_map<std::string, int> merged_hws_map;
         std::string nn_hotwords = "";
 
@@ -458,7 +458,7 @@
         msg_data->msg["audio_fs"] = jsonresult["audio_fs"];
       }
       if (jsonresult.contains("chunk_size")) {
-        if (msg_data->tpass_online_handle == NULL) {
+        if (msg_data->tpass_online_handle == nullptr) {
           std::vector<int> chunk_size_vec =
               jsonresult["chunk_size"].get<std::vector<int>>();
           // check chunk_size_vec
@@ -480,7 +480,7 @@
       if ((jsonresult["is_speaking"] == false ||
           jsonresult["is_finished"] == true) && 
           msg_data->msg["is_eof"] != true &&
-          msg_data->hotwords_embedding != NULL) {
+          msg_data->hotwords_embedding != nullptr) {
         LOG(INFO) << "client done";
 
         // if it is in final message, post the sample_data to decode
@@ -532,7 +532,7 @@
 
           try{
             // post to decode
-            if (msg_data->msg["is_eof"] != true && msg_data->hotwords_embedding != NULL) {
+            if (msg_data->msg["is_eof"] != true && msg_data->hotwords_embedding != nullptr) {
               std::vector<std::vector<float>> hotwords_embedding_(*(msg_data->hotwords_embedding));
               msg_data->strand_->post(
                         std::bind(&WebSocketServer::do_decoder, this,
diff --git a/runtime/websocket/bin/websocket-server-2pass.h b/runtime/websocket/bin/websocket-server-2pass.h
index 6b2ba32..7938f88 100644
--- a/runtime/websocket/bin/websocket-server-2pass.h
+++ b/runtime/websocket/bin/websocket-server-2pass.h
@@ -55,13 +55,13 @@
   nlohmann::json msg;
   std::shared_ptr<std::vector<char>> samples;
   std::shared_ptr<std::vector<std::vector<std::string>>> punc_cache;
-  std::shared_ptr<std::vector<std::vector<float>>> hotwords_embedding=NULL;
+  std::shared_ptr<std::vector<std::vector<float>>> hotwords_embedding=nullptr;
   std::shared_ptr<websocketpp::lib::mutex> thread_lock; // lock for each connection
-  FUNASR_HANDLE tpass_online_handle=NULL;
+  FUNASR_HANDLE tpass_online_handle=nullptr;
   std::string online_res = "";
   std::string tpass_res = "";
   std::shared_ptr<asio::io_context::strand>  strand_; // for data execute in order
-  FUNASR_DEC_HANDLE decoder_handle=NULL; 
+  FUNASR_DEC_HANDLE decoder_handle=nullptr; 
 } FUNASR_MESSAGE;
 
 // See https://wiki.mozilla.org/Security/Server_Side_TLS for more details about
@@ -139,7 +139,7 @@
   asio::io_context& io_decoder_;  // threads for asr decoder
   // std::ofstream fout;
   // FUNASR_HANDLE asr_handle;  // asr engine handle
-  FUNASR_HANDLE tpass_handle=NULL;
+  FUNASR_HANDLE tpass_handle=nullptr;
   bool isonline = true;  // online or offline engine, now only support offline
   bool is_ssl = true;
   server* server_;          // websocket server
diff --git a/runtime/websocket/bin/websocket-server.cpp b/runtime/websocket/bin/websocket-server.cpp
index 026954f..ed25c95 100644
--- a/runtime/websocket/bin/websocket-server.cpp
+++ b/runtime/websocket/bin/websocket-server.cpp
@@ -77,15 +77,16 @@
       std::string stamp_sents="";
       try{
         FUNASR_RESULT Result = FunOfflineInferBuffer(
-            asr_handle, buffer.data(), buffer.size(), RASR_NONE, NULL, 
+            asr_handle, buffer.data(), buffer.size(), RASR_NONE, nullptr, 
             hotwords_embedding, audio_fs, wav_format, itn, decoder_handle);
-        if (Result != NULL){
+        if (Result != nullptr){
           asr_result = FunASRGetResult(Result, 0);  // get decode result
           stamp_res = FunASRGetStamp(Result);
           stamp_sents = FunASRGetStampSents(Result);
           FunASRFreeResult(Result);
         } else{
-          LOG(ERROR) << "FUNASR_RESULT is NULL.";
+          std::this_thread::sleep_for(std::chrono::milliseconds(20));
+          LOG(ERROR) << "FUNASR_RESULT is nullptr.";
         }
       }catch (std::exception const& e) {
         LOG(ERROR) << e.what();
@@ -306,7 +307,7 @@
       }
 
       // hotwords: fst/nn
-      if(msg_data->hotwords_embedding == NULL){
+      if(msg_data->hotwords_embedding == nullptr){
         std::unordered_map<std::string, int> merged_hws_map;
         std::string nn_hotwords = "";
 
@@ -359,7 +360,7 @@
       if ((jsonresult["is_speaking"] == false ||
           jsonresult["is_finished"] == true) && 
           msg_data->msg["is_eof"] != true && 
-          msg_data->hotwords_embedding != NULL) {
+          msg_data->hotwords_embedding != nullptr) {
         LOG(INFO) << "client done";
         // for offline, send all receive data to decoder engine
         std::vector<std::vector<float>> hotwords_embedding_(*(msg_data->hotwords_embedding));
diff --git a/runtime/websocket/bin/websocket-server.h b/runtime/websocket/bin/websocket-server.h
index cacf12d..d18bcab 100644
--- a/runtime/websocket/bin/websocket-server.h
+++ b/runtime/websocket/bin/websocket-server.h
@@ -58,9 +58,9 @@
 typedef struct {
   nlohmann::json msg;
   std::shared_ptr<std::vector<char>> samples;
-  std::shared_ptr<std::vector<std::vector<float>>> hotwords_embedding=NULL;
+  std::shared_ptr<std::vector<std::vector<float>>> hotwords_embedding=nullptr;
   std::shared_ptr<websocketpp::lib::mutex> thread_lock; // lock for each connection
-  FUNASR_DEC_HANDLE decoder_handle=NULL;
+  FUNASR_DEC_HANDLE decoder_handle=nullptr;
 } FUNASR_MESSAGE;
 
 // See https://wiki.mozilla.org/Security/Server_Side_TLS for more details about

--
Gitblit v1.9.1