Merge pull request #493 from alibaba-damo-academy/main
update dev_lyh
32个文件已修改
3个文件已添加
1 文件已重命名
3个文件已删除
| | |
| | | ../TEMPLATE/README.md |
| | | ../../TEMPLATE/README.md |
| New file |
| | |
| | | #!/usr/bin/env bash |
| | | |
| | | set -e |
| | | set -u |
| | | set -o pipefail |
| | | |
| | | stage=1 |
| | | stop_stage=2 |
| | | model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" |
| | | data_dir="./data/test" |
| | | output_dir="./results" |
| | | batch_size=64 |
| | | gpu_inference=true # whether to perform gpu decoding |
| | | gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" |
| | | njob=64 # the number of jobs for CPU decoding, if gpu_inference=false, use CPU decoding, please set njob |
| | | checkpoint_dir= |
| | | checkpoint_name="valid.cer_ctc.ave.pb" |
| | | |
| | | . utils/parse_options.sh || exit 1; |
| | | |
| | | if ${gpu_inference} == "true"; then |
| | | nj=$(echo $gpuid_list | awk -F "," '{print NF}') |
| | | else |
| | | nj=$njob |
| | | batch_size=1 |
| | | gpuid_list="" |
| | | for JOB in $(seq ${nj}); do |
| | | gpuid_list=$gpuid_list"-1," |
| | | done |
| | | fi |
| | | |
| | | mkdir -p $output_dir/split |
| | | split_scps="" |
| | | for JOB in $(seq ${nj}); do |
| | | split_scps="$split_scps $output_dir/split/wav.$JOB.scp" |
| | | done |
| | | perl utils/split_scp.pl ${data_dir}/wav.scp ${split_scps} |
| | | |
| | | if [ -n "${checkpoint_dir}" ]; then |
| | | python utils/prepare_checkpoint.py ${model} ${checkpoint_dir} ${checkpoint_name} |
| | | model=${checkpoint_dir}/${model} |
| | | fi |
| | | |
| | | if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then |
| | | echo "Decoding ..." |
| | | gpuid_list_array=(${gpuid_list//,/ }) |
| | | for JOB in $(seq ${nj}); do |
| | | { |
| | | id=$((JOB-1)) |
| | | gpuid=${gpuid_list_array[$id]} |
| | | mkdir -p ${output_dir}/output.$JOB |
| | | python infer.py \ |
| | | --model ${model} \ |
| | | --audio_in ${output_dir}/split/wav.$JOB.scp \ |
| | | --output_dir ${output_dir}/output.$JOB \ |
| | | --batch_size ${batch_size} \ |
| | | --gpuid ${gpuid} |
| | | }& |
| | | done |
| | | wait |
| | | |
| | | mkdir -p ${output_dir}/1best_recog |
| | | for f in token score text; do |
| | | if [ -f "${output_dir}/output.1/1best_recog/${f}" ]; then |
| | | for i in $(seq "${nj}"); do |
| | | cat "${output_dir}/output.${i}/1best_recog/${f}" |
| | | done | sort -k1 >"${output_dir}/1best_recog/${f}" |
| | | fi |
| | | done |
| | | fi |
| | | |
| | | if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then |
| | | echo "Computing WER ..." |
| | | cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc |
| | | cp ${data_dir}/text ${output_dir}/1best_recog/text.ref |
| | | python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer |
| | | tail -n 3 ${output_dir}/1best_recog/text.cer |
| | | fi |
| | | |
| | | if [ $stage -le 3 ] && [ $stop_stage -ge 3 ];then |
| | | echo "SpeechIO TIOBE textnorm" |
| | | echo "$0 --> Normalizing REF text ..." |
| | | ./utils/textnorm_zh.py \ |
| | | --has_key --to_upper \ |
| | | ${data_dir}/text \ |
| | | ${output_dir}/1best_recog/ref.txt |
| | | |
| | | echo "$0 --> Normalizing HYP text ..." |
| | | ./utils/textnorm_zh.py \ |
| | | --has_key --to_upper \ |
| | | ${output_dir}/1best_recog/text.proc \ |
| | | ${output_dir}/1best_recog/rec.txt |
| | | grep -v $'\t$' ${output_dir}/1best_recog/rec.txt > ${output_dir}/1best_recog/rec_non_empty.txt |
| | | |
| | | echo "$0 --> computing WER/CER and alignment ..." |
| | | ./utils/error_rate_zh \ |
| | | --tokenizer char \ |
| | | --ref ${output_dir}/1best_recog/ref.txt \ |
| | | --hyp ${output_dir}/1best_recog/rec_non_empty.txt \ |
| | | ${output_dir}/1best_recog/DETAILS.txt | tee ${output_dir}/1best_recog/RESULTS.txt |
| | | rm -rf ${output_dir}/1best_recog/rec.txt ${output_dir}/1best_recog/rec_non_empty.txt |
| | | fi |
| | | |
| | |
| | | ../TEMPLATE/README.md |
| | | ../../TEMPLATE/README.md |
| | |
| | | ../TEMPLATE/infer.py |
| | | ../../TEMPLATE/infer.py |
| New file |
| | |
| | | #!/usr/bin/env bash |
| | | |
| | | set -e |
| | | set -u |
| | | set -o pipefail |
| | | |
| | | stage=1 |
| | | stop_stage=2 |
| | | model="damo/speech_paraformer_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch" |
| | | data_dir="./data/test" |
| | | output_dir="./results" |
| | | batch_size=64 |
| | | gpu_inference=true # whether to perform gpu decoding |
| | | gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" |
| | | njob=64 # the number of jobs for CPU decoding, if gpu_inference=false, use CPU decoding, please set njob |
| | | checkpoint_dir= |
| | | checkpoint_name="valid.cer_ctc.ave.pb" |
| | | |
| | | . utils/parse_options.sh || exit 1; |
| | | |
| | | if ${gpu_inference} == "true"; then |
| | | nj=$(echo $gpuid_list | awk -F "," '{print NF}') |
| | | else |
| | | nj=$njob |
| | | batch_size=1 |
| | | gpuid_list="" |
| | | for JOB in $(seq ${nj}); do |
| | | gpuid_list=$gpuid_list"-1," |
| | | done |
| | | fi |
| | | |
| | | mkdir -p $output_dir/split |
| | | split_scps="" |
| | | for JOB in $(seq ${nj}); do |
| | | split_scps="$split_scps $output_dir/split/wav.$JOB.scp" |
| | | done |
| | | perl utils/split_scp.pl ${data_dir}/wav.scp ${split_scps} |
| | | |
| | | if [ -n "${checkpoint_dir}" ]; then |
| | | python utils/prepare_checkpoint.py ${model} ${checkpoint_dir} ${checkpoint_name} |
| | | model=${checkpoint_dir}/${model} |
| | | fi |
| | | |
| | | if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then |
| | | echo "Decoding ..." |
| | | gpuid_list_array=(${gpuid_list//,/ }) |
| | | for JOB in $(seq ${nj}); do |
| | | { |
| | | id=$((JOB-1)) |
| | | gpuid=${gpuid_list_array[$id]} |
| | | mkdir -p ${output_dir}/output.$JOB |
| | | python infer.py \ |
| | | --model ${model} \ |
| | | --audio_in ${output_dir}/split/wav.$JOB.scp \ |
| | | --output_dir ${output_dir}/output.$JOB \ |
| | | --batch_size ${batch_size} \ |
| | | --gpuid ${gpuid} |
| | | }& |
| | | done |
| | | wait |
| | | |
| | | mkdir -p ${output_dir}/1best_recog |
| | | for f in token score text; do |
| | | if [ -f "${output_dir}/output.1/1best_recog/${f}" ]; then |
| | | for i in $(seq "${nj}"); do |
| | | cat "${output_dir}/output.${i}/1best_recog/${f}" |
| | | done | sort -k1 >"${output_dir}/1best_recog/${f}" |
| | | fi |
| | | done |
| | | fi |
| | | |
| | | if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then |
| | | echo "Computing WER ..." |
| | | cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc |
| | | cp ${data_dir}/text ${output_dir}/1best_recog/text.ref |
| | | python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer |
| | | tail -n 3 ${output_dir}/1best_recog/text.cer |
| | | fi |
| | | |
| | | if [ $stage -le 3 ] && [ $stop_stage -ge 3 ];then |
| | | echo "SpeechIO TIOBE textnorm" |
| | | echo "$0 --> Normalizing REF text ..." |
| | | ./utils/textnorm_zh.py \ |
| | | --has_key --to_upper \ |
| | | ${data_dir}/text \ |
| | | ${output_dir}/1best_recog/ref.txt |
| | | |
| | | echo "$0 --> Normalizing HYP text ..." |
| | | ./utils/textnorm_zh.py \ |
| | | --has_key --to_upper \ |
| | | ${output_dir}/1best_recog/text.proc \ |
| | | ${output_dir}/1best_recog/rec.txt |
| | | grep -v $'\t$' ${output_dir}/1best_recog/rec.txt > ${output_dir}/1best_recog/rec_non_empty.txt |
| | | |
| | | echo "$0 --> computing WER/CER and alignment ..." |
| | | ./utils/error_rate_zh \ |
| | | --tokenizer char \ |
| | | --ref ${output_dir}/1best_recog/ref.txt \ |
| | | --hyp ${output_dir}/1best_recog/rec_non_empty.txt \ |
| | | ${output_dir}/1best_recog/DETAILS.txt | tee ${output_dir}/1best_recog/RESULTS.txt |
| | | rm -rf ${output_dir}/1best_recog/rec.txt ${output_dir}/1best_recog/rec_non_empty.txt |
| | | fi |
| | | |
| | |
| | | ../TEMPLATE/README.md |
| | | ../../TEMPLATE/README.md |
| | |
| | | ../TEMPLATE/infer.py |
| | | ../../TEMPLATE/infer.py |
| New file |
| | |
| | | #!/usr/bin/env bash |
| | | |
| | | set -e |
| | | set -u |
| | | set -o pipefail |
| | | |
| | | stage=1 |
| | | stop_stage=2 |
| | | model="damo/speech_paraformer_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch" |
| | | data_dir="./data/test" |
| | | output_dir="./results" |
| | | batch_size=64 |
| | | gpu_inference=true # whether to perform gpu decoding |
| | | gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" |
| | | njob=64 # the number of jobs for CPU decoding, if gpu_inference=false, use CPU decoding, please set njob |
| | | checkpoint_dir= |
| | | checkpoint_name="valid.cer_ctc.ave.pb" |
| | | |
| | | . utils/parse_options.sh || exit 1; |
| | | |
| | | if ${gpu_inference} == "true"; then |
| | | nj=$(echo $gpuid_list | awk -F "," '{print NF}') |
| | | else |
| | | nj=$njob |
| | | batch_size=1 |
| | | gpuid_list="" |
| | | for JOB in $(seq ${nj}); do |
| | | gpuid_list=$gpuid_list"-1," |
| | | done |
| | | fi |
| | | |
| | | mkdir -p $output_dir/split |
| | | split_scps="" |
| | | for JOB in $(seq ${nj}); do |
| | | split_scps="$split_scps $output_dir/split/wav.$JOB.scp" |
| | | done |
| | | perl utils/split_scp.pl ${data_dir}/wav.scp ${split_scps} |
| | | |
| | | if [ -n "${checkpoint_dir}" ]; then |
| | | python utils/prepare_checkpoint.py ${model} ${checkpoint_dir} ${checkpoint_name} |
| | | model=${checkpoint_dir}/${model} |
| | | fi |
| | | |
| | | if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then |
| | | echo "Decoding ..." |
| | | gpuid_list_array=(${gpuid_list//,/ }) |
| | | for JOB in $(seq ${nj}); do |
| | | { |
| | | id=$((JOB-1)) |
| | | gpuid=${gpuid_list_array[$id]} |
| | | mkdir -p ${output_dir}/output.$JOB |
| | | python infer.py \ |
| | | --model ${model} \ |
| | | --audio_in ${output_dir}/split/wav.$JOB.scp \ |
| | | --output_dir ${output_dir}/output.$JOB \ |
| | | --batch_size ${batch_size} \ |
| | | --gpuid ${gpuid} |
| | | }& |
| | | done |
| | | wait |
| | | |
| | | mkdir -p ${output_dir}/1best_recog |
| | | for f in token score text; do |
| | | if [ -f "${output_dir}/output.1/1best_recog/${f}" ]; then |
| | | for i in $(seq "${nj}"); do |
| | | cat "${output_dir}/output.${i}/1best_recog/${f}" |
| | | done | sort -k1 >"${output_dir}/1best_recog/${f}" |
| | | fi |
| | | done |
| | | fi |
| | | |
| | | if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then |
| | | echo "Computing WER ..." |
| | | cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc |
| | | cp ${data_dir}/text ${output_dir}/1best_recog/text.ref |
| | | python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer |
| | | tail -n 3 ${output_dir}/1best_recog/text.cer |
| | | fi |
| | | |
| | | if [ $stage -le 3 ] && [ $stop_stage -ge 3 ];then |
| | | echo "SpeechIO TIOBE textnorm" |
| | | echo "$0 --> Normalizing REF text ..." |
| | | ./utils/textnorm_zh.py \ |
| | | --has_key --to_upper \ |
| | | ${data_dir}/text \ |
| | | ${output_dir}/1best_recog/ref.txt |
| | | |
| | | echo "$0 --> Normalizing HYP text ..." |
| | | ./utils/textnorm_zh.py \ |
| | | --has_key --to_upper \ |
| | | ${output_dir}/1best_recog/text.proc \ |
| | | ${output_dir}/1best_recog/rec.txt |
| | | grep -v $'\t$' ${output_dir}/1best_recog/rec.txt > ${output_dir}/1best_recog/rec_non_empty.txt |
| | | |
| | | echo "$0 --> computing WER/CER and alignment ..." |
| | | ./utils/error_rate_zh \ |
| | | --tokenizer char \ |
| | | --ref ${output_dir}/1best_recog/ref.txt \ |
| | | --hyp ${output_dir}/1best_recog/rec_non_empty.txt \ |
| | | ${output_dir}/1best_recog/DETAILS.txt | tee ${output_dir}/1best_recog/RESULTS.txt |
| | | rm -rf ${output_dir}/1best_recog/rec.txt ${output_dir}/1best_recog/rec_non_empty.txt |
| | | fi |
| | | |
| | |
| | | #### Decode with multi GPUs: |
| | | ```shell |
| | | bash infer.sh \ |
| | | --model "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ |
| | | --model "damo/speech_fsmn_vad_zh-cn-16k-common-pytorch" \ |
| | | --data_dir "./data/test" \ |
| | | --output_dir "./results" \ |
| | | --batch_size 1 \ |
| | |
| | | #### Decode with multi-thread CPUs: |
| | | ```shell |
| | | bash infer.sh \ |
| | | --model "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \ |
| | | --model "damo/speech_fsmn_vad_zh-cn-16k-common-pytorch" \ |
| | | --data_dir "./data/test" \ |
| | | --output_dir "./results" \ |
| | | --gpu_inference false \ |
| | | --njob 1 |
| | | --njob 64 |
| | | ``` |
| | | |
| | | ## Finetune with pipeline |
| | |
| | | |
| | | if __name__ == "__main__": |
| | | parser = argparse.ArgumentParser() |
| | | parser.add_argument('--model', type=str, default="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch") |
| | | parser.add_argument('--model', type=str, default="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch") |
| | | parser.add_argument('--audio_in', type=str, default="./data/test/wav.scp") |
| | | parser.add_argument('--output_dir', type=str, default="./results/") |
| | | parser.add_argument('--batch_size', type=int, default=64) |
| | | parser.add_argument('--batch_size', type=int, default=1) |
| | | parser.add_argument('--gpuid', type=str, default="0") |
| | | args = parser.parse_args() |
| | | modelscope_infer(args) |
| | |
| | | ../../TEMPLATE/README.md |
| | | ../TEMPLATE/README.md |
| | |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.voice_activity_detection, |
| | | model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch", |
| | | model_revision='v1.2.0', |
| | | model_revision=None, |
| | | output_dir=output_dir, |
| | | batch_size=1, |
| | | ) |
| | |
| | | ../../TEMPLATE/infer.py |
| | | ../TEMPLATE/infer.py |
| | |
| | | ../../TEMPLATE/infer.sh |
| | | ../TEMPLATE/infer.sh |
| | |
| | | ../../TEMPLATE/README.md |
| | | ../TEMPLATE/README.md |
| | |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.voice_activity_detection, |
| | | model="damo/speech_fsmn_vad_zh-cn-8k-common", |
| | | model_revision='v1.2.0', |
| | | model_revision=None, |
| | | output_dir=output_dir, |
| | | batch_size=1, |
| | | ) |
| | |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.voice_activity_detection, |
| | | model="damo/speech_fsmn_vad_zh-cn-8k-common", |
| | | model_revision='v1.2.0', |
| | | model_revision=None, |
| | | output_dir=output_dir, |
| | | batch_size=1, |
| | | mode='online', |
| | |
| | | ../../TEMPLATE/infer.py |
| | | ../TEMPLATE/infer.py |
| | |
| | | ../../TEMPLATE/infer.sh |
| | | ../TEMPLATE/infer.sh |
| | |
| | | stream->Write(res); |
| | | } |
| | | else { |
| | | FUNASR_RESULT Result= FunOfflineRecogPCMBuffer(AsrHanlde, tmp_data.c_str(), data_len_int, 16000, RASR_NONE, NULL); |
| | | FUNASR_RESULT Result= FunOfflineInferBuffer(AsrHanlde, tmp_data.c_str(), data_len_int, RASR_NONE, NULL, 16000); |
| | | std::string asr_result = ((FUNASR_RECOG_RESULT*)Result)->msg; |
| | | |
| | | auto end_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count(); |
| | |
| | | FUNASR_MODEL_PARAFORMER = 3, |
| | | }FUNASR_MODEL_TYPE; |
| | | |
| | | typedef enum |
| | | { |
| | | FSMN_VAD_OFFLINE=0, |
| | | FSMN_VAD_ONLINE = 1, |
| | | }FSMN_VAD_MODE; |
| | | |
| | | typedef void (* QM_CALLBACK)(int cur_step, int n_total); // n_total: total steps; cur_step: Current Step. |
| | | |
| | | // ASR |
| | | _FUNASRAPI FUNASR_HANDLE FunASRInit(std::map<std::string, std::string>& model_path, int thread_num); |
| | | |
| | | _FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback); |
| | | _FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback); |
| | | _FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback); |
| | | _FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback); |
| | | // buffer |
| | | _FUNASRAPI FUNASR_RESULT FunASRInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000); |
| | | // file, support wav & pcm |
| | | _FUNASRAPI FUNASR_RESULT FunASRInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000); |
| | | |
| | | _FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT result,int n_index); |
| | | _FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result); |
| | |
| | | _FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT result); |
| | | |
| | | // VAD |
| | | _FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map<std::string, std::string>& model_path, int thread_num); |
| | | _FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map<std::string, std::string>& model_path, int thread_num, FSMN_VAD_MODE mode=FSMN_VAD_OFFLINE); |
| | | // buffer |
| | | _FUNASRAPI FUNASR_RESULT FsmnVadInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FSMN_VAD_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000); |
| | | // file, support wav & pcm |
| | | _FUNASRAPI FUNASR_RESULT FsmnVadInfer(FUNASR_HANDLE handle, const char* sz_filename, FSMN_VAD_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000); |
| | | |
| | | _FUNASRAPI FUNASR_RESULT FsmnVadWavFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback); |
| | | _FUNASRAPI std::vector<std::vector<int>>* FsmnVadGetResult(FUNASR_RESULT result,int n_index); |
| | | _FUNASRAPI void FsmnVadFreeResult(FUNASR_RESULT result); |
| | | _FUNASRAPI void FsmnVadUninit(FUNASR_HANDLE handle); |
| | |
| | | |
| | | //OfflineStream |
| | | _FUNASRAPI FUNASR_HANDLE FunOfflineInit(std::map<std::string, std::string>& model_path, int thread_num); |
| | | _FUNASRAPI FUNASR_RESULT FunOfflineRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback); |
| | | _FUNASRAPI FUNASR_RESULT FunOfflineRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback); |
| | | // buffer |
| | | _FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000); |
| | | // file, support wav & pcm |
| | | _FUNASRAPI FUNASR_RESULT FunOfflineInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000); |
| | | _FUNASRAPI void FunOfflineUninit(FUNASR_HANDLE handle); |
| | | |
| | | #ifdef __cplusplus |
| | |
| | | virtual void LoadConfigFromYaml(const char* filename)=0; |
| | | virtual void FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats, |
| | | const std::vector<float> &waves)=0; |
| | | virtual std::vector<std::vector<float>> &LfrCmvn(std::vector<std::vector<float>> &vad_feats)=0; |
| | | virtual void LfrCmvn(std::vector<std::vector<float>> &vad_feats)=0; |
| | | virtual void Forward( |
| | | const std::vector<std::vector<float>> &chunk_feats, |
| | | std::vector<std::vector<float>> *out_prob)=0; |
| | |
| | | virtual void InitCache()=0; |
| | | }; |
| | | |
| | | VadModel *CreateVadModel(std::map<std::string, std::string>& model_path, int thread_num); |
| | | VadModel *CreateVadModel(std::map<std::string, std::string>& model_path, int thread_num, int mode); |
| | | } // namespace funasr |
| | | #endif |
| | |
| | | |
| | | ### funasr-onnx-offline |
| | | ```shell |
| | | ./funasr-onnx-offline [--wav-scp <string>] [--wav-path <string>] |
| | | [--punc-quant <string>] [--punc-dir <string>] |
| | | [--vad-quant <string>] [--vad-dir <string>] |
| | | [--quantize <string>] --model-dir <string> |
| | | [--] [--version] [-h] |
| | | ./funasr-onnx-offline --model-dir <string> [--quantize <string>] |
| | | [--vad-dir <string>] [--vad-quant <string>] |
| | | [--punc-dir <string>] [--punc-quant <string>] |
| | | --wav-path <string> [--] [--version] [-h] |
| | | Where: |
| | | --model-dir <string> |
| | | (required) the asr model path, which contains model.onnx, config.yaml, am.mvn |
| | |
| | | --punc-quant <string> |
| | | false (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir |
| | | |
| | | --wav-scp <string> |
| | | wave scp path |
| | | --wav-path <string> |
| | | wave file path |
| | | (required) the input could be: |
| | | wav_path, e.g.: asr_example.wav; |
| | | pcm_path, e.g.: asr_example.pcm; |
| | | wav.scp, kaldi style wav list (wav_id \t wav_path) |
| | | |
| | | Required: --model-dir <string> |
| | | Required: --model-dir <string> --wav-path <string> |
| | | If use vad, please add: --vad-dir <string> |
| | | If use punc, please add: --punc-dir <string> |
| | | |
| | |
| | | |
| | | ### funasr-onnx-offline-vad |
| | | ```shell |
| | | ./funasr-onnx-offline-vad [--wav-scp <string>] [--wav-path <string>] |
| | | [--quantize <string>] --model-dir <string> |
| | | [--] [--version] [-h] |
| | | ./funasr-onnx-offline-vad --model-dir <string> [--quantize <string>] |
| | | --wav-path <string> [--] [--version] [-h] |
| | | Where: |
| | | --model-dir <string> |
| | | (required) the vad model path, which contains model.onnx, vad.yaml, vad.mvn |
| | | --quantize <string> |
| | | false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir |
| | | --wav-scp <string> |
| | | wave scp path |
| | | --wav-path <string> |
| | | wave file path |
| | | (required) the input could be: |
| | | wav_path, e.g.: asr_example.wav; |
| | | pcm_path, e.g.: asr_example.pcm; |
| | | wav.scp, kaldi style wav list (wav_id \t wav_path) |
| | | |
| | | Required: --model-dir <string> |
| | | Required: --model-dir <string> --wav-path <string> |
| | | |
| | | For example: |
| | | ./funasr-onnx-offline-vad \ |
| | |
| | | |
| | | ### funasr-onnx-offline-punc |
| | | ```shell |
| | | ./funasr-onnx-offline-punc [--txt-path <string>] [--quantize <string>] |
| | | --model-dir <string> [--] [--version] [-h] |
| | | ./funasr-onnx-offline-punc --model-dir <string> [--quantize <string>] |
| | | --txt-path <string> [--] [--version] [-h] |
| | | Where: |
| | | --model-dir <string> |
| | | (required) the punc model path, which contains model.onnx, punc.yaml |
| | | --quantize <string> |
| | | false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir |
| | | --txt-path <string> |
| | | txt file path, one sentence per line |
| | | (required) txt file path, one sentence per line |
| | | |
| | | Required: --model-dir <string> |
| | | Required: --model-dir <string> --txt-path <string> |
| | | |
| | | For example: |
| | | ./funasr-onnx-offline-punc \ |
| | |
| | | ``` |
| | | ### funasr-onnx-offline-rtf |
| | | ```shell |
| | | ./funasr-onnx-offline-rtf --thread-num <int32_t> --wav-scp <string> |
| | | [--quantize <string>] --model-dir <string> |
| | | ./funasr-onnx-offline-rtf --model-dir <string> [--quantize <string>] |
| | | --wav-path <string> --thread-num <int32_t> |
| | | [--] [--version] [-h] |
| | | Where: |
| | | --thread-num <int32_t> |
| | |
| | | (required) the model path, which contains model.onnx, config.yaml, am.mvn |
| | | --quantize <string> |
| | | false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir |
| | | --wav-scp <string> |
| | | (required) wave scp path |
| | | --wav-path <string> |
| | | (required) the input could be: |
| | | wav_path, e.g.: asr_example.wav; |
| | | pcm_path, e.g.: asr_example.pcm; |
| | | wav.scp, kaldi style wav list (wav_id \t wav_path) |
| | | |
| | | For example: |
| | | ./funasr-onnx-offline-rtf \ |
| | | --model-dir ./asrmodel/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch \ |
| | | --quantize true \ |
| | | --wav-scp ./aishell1_test.scp \ |
| | | --wav-path ./aishell1_test.scp \ |
| | | --thread-num 32 |
| | | ``` |
| | | |
| | |
| | | } |
| | | } |
| | | |
| | | std::vector<std::vector<float>> &FsmnVad::LfrCmvn(std::vector<std::vector<float>> &vad_feats) { |
| | | void FsmnVad::LfrCmvn(std::vector<std::vector<float>> &vad_feats) { |
| | | |
| | | std::vector<std::vector<float>> out_feats; |
| | | int T = vad_feats.size(); |
| | |
| | | } |
| | | } |
| | | vad_feats = out_feats; |
| | | return vad_feats; |
| | | } |
| | | |
| | | std::vector<std::vector<int>> |
| | |
| | | std::vector<std::vector<float>> vad_feats; |
| | | std::vector<std::vector<float>> vad_probs; |
| | | FbankKaldi(vad_sample_rate_, vad_feats, waves); |
| | | vad_feats = LfrCmvn(vad_feats); |
| | | LfrCmvn(vad_feats); |
| | | Forward(vad_feats, &vad_probs); |
| | | |
| | | E2EVadModel vad_scorer = E2EVadModel(); |
| | |
| | | void FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats, |
| | | const std::vector<float> &waves); |
| | | |
| | | std::vector<std::vector<float>> &LfrCmvn(std::vector<std::vector<float>> &vad_feats); |
| | | void LfrCmvn(std::vector<std::vector<float>> &vad_feats); |
| | | |
| | | void Forward( |
| | | const std::vector<std::vector<float>> &chunk_feats, |
| | |
| | | TCLAP::CmdLine cmd("funasr-onnx-offline-punc", ' ', "1.0"); |
| | | TCLAP::ValueArg<std::string> model_dir("", MODEL_DIR, "the punc model path, which contains model.onnx, punc.yaml", true, "", "string"); |
| | | TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string"); |
| | | TCLAP::ValueArg<std::string> txt_path("", TXT_PATH, "txt file path, one sentence per line", false, "", "string"); |
| | | TCLAP::ValueArg<std::string> txt_path("", TXT_PATH, "txt file path, one sentence per line", true, "", "string"); |
| | | |
| | | cmd.add(model_dir); |
| | | cmd.add(quantize); |
| | |
| | | // warm up |
| | | for (size_t i = 0; i < 1; i++) |
| | | { |
| | | FUNASR_RESULT result=FunASRRecogFile(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL); |
| | | FUNASR_RESULT result=FunASRInfer(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL, 16000); |
| | | } |
| | | |
| | | while (true) { |
| | |
| | | } |
| | | |
| | | gettimeofday(&start, NULL); |
| | | FUNASR_RESULT result=FunASRRecogFile(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL); |
| | | FUNASR_RESULT result=FunASRInfer(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL, 16000); |
| | | |
| | | gettimeofday(&end, NULL); |
| | | seconds = (end.tv_sec - start.tv_sec); |
| | |
| | | } |
| | | } |
| | | |
| | | bool is_target_file(const std::string& filename, const std::string target) { |
| | | std::size_t pos = filename.find_last_of("."); |
| | | if (pos == std::string::npos) { |
| | | return false; |
| | | } |
| | | std::string extension = filename.substr(pos + 1); |
| | | return (extension == target); |
| | | } |
| | | |
| | | void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path) |
| | | { |
| | | if (value_arg.isSet()){ |
| | |
| | | TCLAP::ValueArg<std::string> model_dir("", MODEL_DIR, "the model path, which contains model.onnx, config.yaml, am.mvn", true, "", "string"); |
| | | TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string"); |
| | | |
| | | TCLAP::ValueArg<std::string> wav_scp("", WAV_SCP, "wave scp path", true, "", "string"); |
| | | TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string"); |
| | | TCLAP::ValueArg<std::int32_t> thread_num("", THREAD_NUM, "multi-thread num for rtf", true, 0, "int32_t"); |
| | | |
| | | cmd.add(model_dir); |
| | | cmd.add(quantize); |
| | | cmd.add(wav_scp); |
| | | cmd.add(wav_path); |
| | | cmd.add(thread_num); |
| | | cmd.parse(argc, argv); |
| | | |
| | | std::map<std::string, std::string> model_path; |
| | | GetValue(model_dir, MODEL_DIR, model_path); |
| | | GetValue(quantize, QUANTIZE, model_path); |
| | | GetValue(wav_scp, WAV_SCP, model_path); |
| | | GetValue(wav_path, WAV_PATH, model_path); |
| | | |
| | | struct timeval start, end; |
| | | gettimeofday(&start, NULL); |
| | |
| | | |
| | | // read wav_scp |
| | | vector<string> wav_list; |
| | | if(model_path.find(WAV_SCP)!=model_path.end()){ |
| | | ifstream in(model_path.at(WAV_SCP)); |
| | | string wav_path_ = model_path.at(WAV_PATH); |
| | | if(is_target_file(wav_path_, "wav") || is_target_file(wav_path_, "pcm")){ |
| | | wav_list.emplace_back(wav_path_); |
| | | } |
| | | else if(is_target_file(wav_path_, "scp")){ |
| | | ifstream in(wav_path_); |
| | | if (!in.is_open()) { |
| | | LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP); |
| | | LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP) ; |
| | | return 0; |
| | | } |
| | | string line; |
| | |
| | | wav_list.emplace_back(column2); |
| | | } |
| | | in.close(); |
| | | }else{ |
| | | LOG(ERROR)<<"Please check the wav extension!"; |
| | | exit(-1); |
| | | } |
| | | |
| | | // 多线程测试 |
| | |
| | | |
| | | using namespace std; |
| | | |
| | | bool is_target_file(const std::string& filename, const std::string target) { |
| | | std::size_t pos = filename.find_last_of("."); |
| | | if (pos == std::string::npos) { |
| | | return false; |
| | | } |
| | | std::string extension = filename.substr(pos + 1); |
| | | return (extension == target); |
| | | } |
| | | |
| | | void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path) |
| | | { |
| | | if (value_arg.isSet()){ |
| | |
| | | TCLAP::ValueArg<std::string> model_dir("", MODEL_DIR, "the vad model path, which contains model.onnx, vad.yaml, vad.mvn", true, "", "string"); |
| | | TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string"); |
| | | |
| | | TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "wave file path", false, "", "string"); |
| | | TCLAP::ValueArg<std::string> wav_scp("", WAV_SCP, "wave scp path", false, "", "string"); |
| | | TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string"); |
| | | |
| | | cmd.add(model_dir); |
| | | cmd.add(quantize); |
| | | cmd.add(wav_path); |
| | | cmd.add(wav_scp); |
| | | cmd.parse(argc, argv); |
| | | |
| | | std::map<std::string, std::string> model_path; |
| | | GetValue(model_dir, MODEL_DIR, model_path); |
| | | GetValue(quantize, QUANTIZE, model_path); |
| | | GetValue(wav_path, WAV_PATH, model_path); |
| | | GetValue(wav_scp, WAV_SCP, model_path); |
| | | |
| | | struct timeval start, end; |
| | | gettimeofday(&start, NULL); |
| | |
| | | long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); |
| | | LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s"; |
| | | |
| | | // read wav_path and wav_scp |
| | | // read wav_path |
| | | vector<string> wav_list; |
| | | |
| | | if(model_path.find(WAV_PATH)!=model_path.end()){ |
| | | wav_list.emplace_back(model_path.at(WAV_PATH)); |
| | | string wav_path_ = model_path.at(WAV_PATH); |
| | | if(is_target_file(wav_path_, "wav") || is_target_file(wav_path_, "pcm")){ |
| | | wav_list.emplace_back(wav_path_); |
| | | } |
| | | if(model_path.find(WAV_SCP)!=model_path.end()){ |
| | | ifstream in(model_path.at(WAV_SCP)); |
| | | else if(is_target_file(wav_path_, "scp")){ |
| | | ifstream in(wav_path_); |
| | | if (!in.is_open()) { |
| | | LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP) ; |
| | | return 0; |
| | |
| | | wav_list.emplace_back(column2); |
| | | } |
| | | in.close(); |
| | | }else{ |
| | | LOG(ERROR)<<"Please check the wav extension!"; |
| | | exit(-1); |
| | | } |
| | | |
| | | float snippet_time = 0.0f; |
| | | long taking_micros = 0; |
| | | for(auto& wav_file : wav_list){ |
| | | gettimeofday(&start, NULL); |
| | | FUNASR_RESULT result=FsmnVadWavFile(vad_hanlde, wav_file.c_str(), RASR_NONE, NULL); |
| | | FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), FSMN_VAD_OFFLINE, NULL, 16000); |
| | | gettimeofday(&end, NULL); |
| | | seconds = (end.tv_sec - start.tv_sec); |
| | | taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); |
| | |
| | | |
| | | using namespace std; |
| | | |
| | | bool is_target_file(const std::string& filename, const std::string target) { |
| | | std::size_t pos = filename.find_last_of("."); |
| | | if (pos == std::string::npos) { |
| | | return false; |
| | | } |
| | | std::string extension = filename.substr(pos + 1); |
| | | return (extension == target); |
| | | } |
| | | |
| | | void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path) |
| | | { |
| | | if (value_arg.isSet()){ |
| | |
| | | TCLAP::ValueArg<std::string> punc_dir("", PUNC_DIR, "the punc model path, which contains model.onnx, punc.yaml", false, "", "string"); |
| | | TCLAP::ValueArg<std::string> punc_quant("", PUNC_QUANT, "false (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir", false, "false", "string"); |
| | | |
| | | TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "wave file path", false, "", "string"); |
| | | TCLAP::ValueArg<std::string> wav_scp("", WAV_SCP, "wave scp path", false, "", "string"); |
| | | TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string"); |
| | | |
| | | cmd.add(model_dir); |
| | | cmd.add(quantize); |
| | |
| | | cmd.add(punc_dir); |
| | | cmd.add(punc_quant); |
| | | cmd.add(wav_path); |
| | | cmd.add(wav_scp); |
| | | cmd.parse(argc, argv); |
| | | |
| | | std::map<std::string, std::string> model_path; |
| | |
| | | GetValue(punc_dir, PUNC_DIR, model_path); |
| | | GetValue(punc_quant, PUNC_QUANT, model_path); |
| | | GetValue(wav_path, WAV_PATH, model_path); |
| | | GetValue(wav_scp, WAV_SCP, model_path); |
| | | |
| | | struct timeval start, end; |
| | | gettimeofday(&start, NULL); |
| | |
| | | long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); |
| | | LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s"; |
| | | |
| | | // read wav_path and wav_scp |
| | | // read wav_path |
| | | vector<string> wav_list; |
| | | |
| | | if(model_path.find(WAV_PATH)!=model_path.end()){ |
| | | wav_list.emplace_back(model_path.at(WAV_PATH)); |
| | | string wav_path_ = model_path.at(WAV_PATH); |
| | | if(is_target_file(wav_path_, "wav") || is_target_file(wav_path_, "pcm")){ |
| | | wav_list.emplace_back(wav_path_); |
| | | } |
| | | if(model_path.find(WAV_SCP)!=model_path.end()){ |
| | | ifstream in(model_path.at(WAV_SCP)); |
| | | else if(is_target_file(wav_path_, "scp")){ |
| | | ifstream in(wav_path_); |
| | | if (!in.is_open()) { |
| | | LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP) ; |
| | | return 0; |
| | |
| | | wav_list.emplace_back(column2); |
| | | } |
| | | in.close(); |
| | | }else{ |
| | | LOG(ERROR)<<"Please check the wav extension!"; |
| | | exit(-1); |
| | | } |
| | | |
| | | float snippet_time = 0.0f; |
| | | long taking_micros = 0; |
| | | for(auto& wav_file : wav_list){ |
| | | gettimeofday(&start, NULL); |
| | | FUNASR_RESULT result=FunOfflineRecogFile(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL); |
| | | FUNASR_RESULT result=FunOfflineInfer(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL, 16000); |
| | | gettimeofday(&end, NULL); |
| | | seconds = (end.tv_sec - start.tv_sec); |
| | | taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); |
| | |
| | | return mm; |
| | | } |
| | | |
| | | _FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map<std::string, std::string>& model_path, int thread_num) |
| | | _FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map<std::string, std::string>& model_path, int thread_num, FSMN_VAD_MODE mode) |
| | | { |
| | | funasr::VadModel* mm = funasr::CreateVadModel(model_path, thread_num); |
| | | funasr::VadModel* mm = funasr::CreateVadModel(model_path, thread_num, mode); |
| | | return mm; |
| | | } |
| | | |
| | |
| | | } |
| | | |
| | | // APIs for ASR Infer |
| | | _FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback) |
| | | { |
| | | funasr::Model* recog_obj = (funasr::Model*)handle; |
| | | if (!recog_obj) |
| | | return nullptr; |
| | | |
| | | int32_t sampling_rate = -1; |
| | | funasr::Audio audio(1); |
| | | if (!audio.LoadWav(sz_buf, n_len, &sampling_rate)) |
| | | return nullptr; |
| | | |
| | | float* buff; |
| | | int len; |
| | | int flag=0; |
| | | funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; |
| | | p_result->snippet_time = audio.GetTimeLen(); |
| | | int n_step = 0; |
| | | int n_total = audio.GetQueueSize(); |
| | | while (audio.Fetch(buff, len, flag) > 0) { |
| | | string msg = recog_obj->Forward(buff, len, flag); |
| | | p_result->msg += msg; |
| | | n_step++; |
| | | if (fn_callback) |
| | | fn_callback(n_step, n_total); |
| | | } |
| | | |
| | | return p_result; |
| | | } |
| | | |
| | | _FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback) |
| | | _FUNASRAPI FUNASR_RESULT FunASRInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate) |
| | | { |
| | | funasr::Model* recog_obj = (funasr::Model*)handle; |
| | | if (!recog_obj) |
| | |
| | | return p_result; |
| | | } |
| | | |
| | | _FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback) |
| | | _FUNASRAPI FUNASR_RESULT FunASRInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate) |
| | | { |
| | | funasr::Model* recog_obj = (funasr::Model*)handle; |
| | | if (!recog_obj) |
| | | return nullptr; |
| | | |
| | | funasr::Audio audio(1); |
| | | if (!audio.LoadPcmwav(sz_filename, &sampling_rate)) |
| | | return nullptr; |
| | | if(funasr::is_target_file(sz_filename, "wav")){ |
| | | int32_t sampling_rate_ = -1; |
| | | if(!audio.LoadWav(sz_filename, &sampling_rate_)) |
| | | return nullptr; |
| | | }else if(funasr::is_target_file(sz_filename, "pcm")){ |
| | | if (!audio.LoadPcmwav(sz_filename, &sampling_rate)) |
| | | return nullptr; |
| | | }else{ |
| | | LOG(ERROR)<<"Wrong wav extension"; |
| | | exit(-1); |
| | | } |
| | | |
| | | float* buff; |
| | | int len; |
| | | int flag = 0; |
| | | funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; |
| | | p_result->snippet_time = audio.GetTimeLen(); |
| | | int n_step = 0; |
| | | int n_total = audio.GetQueueSize(); |
| | | funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; |
| | | p_result->snippet_time = audio.GetTimeLen(); |
| | | while (audio.Fetch(buff, len, flag) > 0) { |
| | | string msg = recog_obj->Forward(buff, len, flag); |
| | | p_result->msg += msg; |
| | |
| | | return p_result; |
| | | } |
| | | |
| | | _FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback) |
| | | { |
| | | funasr::Model* recog_obj = (funasr::Model*)handle; |
| | | if (!recog_obj) |
| | | return nullptr; |
| | | |
| | | int32_t sampling_rate = -1; |
| | | funasr::Audio audio(1); |
| | | if(!audio.LoadWav(sz_wavfile, &sampling_rate)) |
| | | return nullptr; |
| | | |
| | | float* buff; |
| | | int len; |
| | | int flag = 0; |
| | | int n_step = 0; |
| | | int n_total = audio.GetQueueSize(); |
| | | funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; |
| | | p_result->snippet_time = audio.GetTimeLen(); |
| | | while (audio.Fetch(buff, len, flag) > 0) { |
| | | string msg = recog_obj->Forward(buff, len, flag); |
| | | p_result->msg+= msg; |
| | | n_step++; |
| | | if (fn_callback) |
| | | fn_callback(n_step, n_total); |
| | | } |
| | | |
| | | return p_result; |
| | | } |
| | | |
| | | // APIs for VAD Infer |
| | | _FUNASRAPI FUNASR_RESULT FsmnVadWavFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback) |
| | | _FUNASRAPI FUNASR_RESULT FsmnVadInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FSMN_VAD_MODE mode, QM_CALLBACK fn_callback, int sampling_rate) |
| | | { |
| | | funasr::VadModel* vad_obj = (funasr::VadModel*)handle; |
| | | if (!vad_obj) |
| | | return nullptr; |
| | | |
| | | int32_t sampling_rate = -1; |
| | | |
| | | funasr::Audio audio(1); |
| | | if(!audio.LoadWav(sz_wavfile, &sampling_rate)) |
| | | if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate)) |
| | | return nullptr; |
| | | |
| | | funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT; |
| | | p_result->snippet_time = audio.GetTimeLen(); |
| | | |
| | | vector<std::vector<int>> vad_segments; |
| | | audio.Split(vad_obj, vad_segments); |
| | | p_result->segments = new vector<std::vector<int>>(vad_segments); |
| | | |
| | | return p_result; |
| | | } |
| | | |
| | | _FUNASRAPI FUNASR_RESULT FsmnVadInfer(FUNASR_HANDLE handle, const char* sz_filename, FSMN_VAD_MODE mode, QM_CALLBACK fn_callback, int sampling_rate) |
| | | { |
| | | funasr::VadModel* vad_obj = (funasr::VadModel*)handle; |
| | | if (!vad_obj) |
| | | return nullptr; |
| | | |
| | | funasr::Audio audio(1); |
| | | if(funasr::is_target_file(sz_filename, "wav")){ |
| | | int32_t sampling_rate_ = -1; |
| | | if(!audio.LoadWav(sz_filename, &sampling_rate_)) |
| | | return nullptr; |
| | | }else if(funasr::is_target_file(sz_filename, "pcm")){ |
| | | if (!audio.LoadPcmwav(sz_filename, &sampling_rate)) |
| | | return nullptr; |
| | | }else{ |
| | | LOG(ERROR)<<"Wrong wav extension"; |
| | | exit(-1); |
| | | } |
| | | |
| | | funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT; |
| | | p_result->snippet_time = audio.GetTimeLen(); |
| | |
| | | } |
| | | |
| | | // APIs for Offline-stream Infer |
| | | _FUNASRAPI FUNASR_RESULT FunOfflineRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback) |
| | | { |
| | | funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle; |
| | | if (!offline_stream) |
| | | return nullptr; |
| | | |
| | | int32_t sampling_rate = -1; |
| | | funasr::Audio audio(1); |
| | | if(!audio.LoadWav(sz_wavfile, &sampling_rate)) |
| | | return nullptr; |
| | | if(offline_stream->UseVad()){ |
| | | audio.Split(offline_stream); |
| | | } |
| | | |
| | | float* buff; |
| | | int len; |
| | | int flag = 0; |
| | | int n_step = 0; |
| | | int n_total = audio.GetQueueSize(); |
| | | funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; |
| | | p_result->snippet_time = audio.GetTimeLen(); |
| | | while (audio.Fetch(buff, len, flag) > 0) { |
| | | string msg = (offline_stream->asr_handle)->Forward(buff, len, flag); |
| | | p_result->msg+= msg; |
| | | n_step++; |
| | | if (fn_callback) |
| | | fn_callback(n_step, n_total); |
| | | } |
| | | if(offline_stream->UsePunc()){ |
| | | string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str()); |
| | | p_result->msg = punc_res; |
| | | } |
| | | |
| | | return p_result; |
| | | } |
| | | |
| | | _FUNASRAPI FUNASR_RESULT FunOfflineRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback) |
| | | _FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate) |
| | | { |
| | | funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle; |
| | | if (!offline_stream) |
| | |
| | | return p_result; |
| | | } |
| | | |
| | | _FUNASRAPI FUNASR_RESULT FunOfflineInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate) |
| | | { |
| | | funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle; |
| | | if (!offline_stream) |
| | | return nullptr; |
| | | |
| | | funasr::Audio audio(1); |
| | | if(funasr::is_target_file(sz_filename, "wav")){ |
| | | int32_t sampling_rate_ = -1; |
| | | if(!audio.LoadWav(sz_filename, &sampling_rate_)) |
| | | return nullptr; |
| | | }else if(funasr::is_target_file(sz_filename, "pcm")){ |
| | | if (!audio.LoadPcmwav(sz_filename, &sampling_rate)) |
| | | return nullptr; |
| | | }else{ |
| | | LOG(ERROR)<<"Wrong wav extension"; |
| | | exit(-1); |
| | | } |
| | | if(offline_stream->UseVad()){ |
| | | audio.Split(offline_stream); |
| | | } |
| | | |
| | | float* buff; |
| | | int len; |
| | | int flag = 0; |
| | | int n_step = 0; |
| | | int n_total = audio.GetQueueSize(); |
| | | funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT; |
| | | p_result->snippet_time = audio.GetTimeLen(); |
| | | while (audio.Fetch(buff, len, flag) > 0) { |
| | | string msg = (offline_stream->asr_handle)->Forward(buff, len, flag); |
| | | p_result->msg+= msg; |
| | | n_step++; |
| | | if (fn_callback) |
| | | fn_callback(n_step, n_total); |
| | | } |
| | | if(offline_stream->UsePunc()){ |
| | | string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str()); |
| | | p_result->msg = punc_res; |
| | | } |
| | | |
| | | return p_result; |
| | | } |
| | | |
| | | _FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result) |
| | | { |
| | | if (!result) |
| | |
| | | } |
| | | } |
| | | |
| | | bool is_target_file(const std::string& filename, const std::string target) { |
| | | std::size_t pos = filename.find_last_of("."); |
| | | if (pos == std::string::npos) { |
| | | return false; |
| | | } |
| | | std::string extension = filename.substr(pos + 1); |
| | | return (extension == target); |
| | | } |
| | | |
| | | } // namespace funasr |
| | |
| | | extern void Glu(Tensor<float> *din, Tensor<float> *dout); |
| | | |
| | | string PathAppend(const string &p1, const string &p2); |
| | | bool is_target_file(const std::string& filename, const std::string target); |
| | | |
| | | } // namespace funasr |
| | | #endif |
| | |
| | | #include "precomp.h" |
| | | |
| | | namespace funasr { |
| | | VadModel *CreateVadModel(std::map<std::string, std::string>& model_path, int thread_num) |
| | | VadModel *CreateVadModel(std::map<std::string, std::string>& model_path, int thread_num, int mode) |
| | | { |
| | | VadModel *mm; |
| | | mm = new FsmnVad(); |
| | | if(mode == FSMN_VAD_OFFLINE){ |
| | | mm = new FsmnVad(); |
| | | }else{ |
| | | LOG(ERROR)<<"Online fsmn vad not imp!"; |
| | | } |
| | | |
| | | string vad_model_path; |
| | | string vad_cmvn_path; |
| | |
| | | if (!buffer.empty()) {
|
| | | // fout.write(buffer.data(), buffer.size());
|
| | | // feed data to asr engine
|
| | | FUNASR_RESULT Result = FunOfflineRecogPCMBuffer(
|
| | | asr_hanlde, buffer.data(), buffer.size(), 16000, RASR_NONE, NULL);
|
| | | FUNASR_RESULT Result = FunOfflineInferBuffer(
|
| | | asr_hanlde, buffer.data(), buffer.size(), RASR_NONE, NULL, 16000);
|
| | |
|
| | | std::string asr_result =
|
| | | ((FUNASR_RECOG_RESULT*)Result)->msg; // get decode result
|
| | |
| | | |
| | | |
| | | def time_stamp_sentence(punc_id_list, time_stamp_postprocessed, text_postprocessed): |
| | | punc_list = [',', '。', '?', '、'] |
| | | res = [] |
| | | if text_postprocessed is None: |
| | | return res |
| | |
| | | punc_id = int(punc_id) if punc_id is not None else 1 |
| | | sentence_end = time_stamp[1] if time_stamp is not None else sentence_end |
| | | |
| | | if punc_id == 2: |
| | | sentence_text += ',' |
| | | res.append({ |
| | | 'text': sentence_text, |
| | | "start": sentence_start, |
| | | "end": sentence_end, |
| | | "text_seg": sentence_text_seg, |
| | | "ts_list": ts_list |
| | | }) |
| | | sentence_text = '' |
| | | sentence_text_seg = '' |
| | | ts_list = [] |
| | | sentence_start = sentence_end |
| | | elif punc_id == 3: |
| | | sentence_text += '.' |
| | | res.append({ |
| | | 'text': sentence_text, |
| | | "start": sentence_start, |
| | | "end": sentence_end, |
| | | "text_seg": sentence_text_seg, |
| | | "ts_list": ts_list |
| | | }) |
| | | sentence_text = '' |
| | | sentence_text_seg = '' |
| | | ts_list = [] |
| | | sentence_start = sentence_end |
| | | elif punc_id == 4: |
| | | sentence_text += '?' |
| | | if punc_id > 1: |
| | | sentence_text += punc_list[punc_id - 2] |
| | | res.append({ |
| | | 'text': sentence_text, |
| | | "start": sentence_start, |