From 1a6d9d5cc422dcd1e6dd5b9c67047d63bc6cd667 Mon Sep 17 00:00:00 2001
From: 语帆 <yf352572@alibaba-inc.com>
Date: 星期一, 04 三月 2024 16:32:28 +0800
Subject: [PATCH] atsr

---
 /dev/null                                           |   71 -----------------------------------
 .gitignore                                          |    1 
 funasr/utils/load_utils.py                          |    2 -
 examples/industrial_data_pretraining/lcbnet/demo.py |    2 
 examples/industrial_data_pretraining/lcbnet/demo.sh |   10 ++--
 5 files changed, 7 insertions(+), 79 deletions(-)

diff --git a/.gitignore b/.gitignore
index d2b4c53..1f2a3d1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,4 +26,5 @@
 GPT-SoVITS*
 examples/*/*/outputs
 examples/*/*/exp
+examples/*/*/tmp
 cmd_read
diff --git a/examples/industrial_data_pretraining/lcbnet/demo.py b/examples/industrial_data_pretraining/lcbnet/demo.py
index d0870bc..602a986 100755
--- a/examples/industrial_data_pretraining/lcbnet/demo.py
+++ b/examples/industrial_data_pretraining/lcbnet/demo.py
@@ -10,7 +10,7 @@
 
 
 # example1
-res = model.generate(input='["~/.cache/modelscope/hub/iic/LCB-NET/example/asr_example.wav","~/.cache/modelscope/hub/iic/LCB-NET/example/ocr.txt"]',data_type='["sound", "text"]')
+res = model.generate(input=("https://www.modelscope.cn/api/v1/models/iic/LCB-NET/repo?Revision=master&FilePath=example/asr_example.wav","https://www.modelscope.cn/api/v1/models/iic/LCB-NET/repo?Revision=master&FilePath=example/ocr.txt"),data_type=("sound", "text"))
 
 print(res)
 
diff --git a/examples/industrial_data_pretraining/lcbnet/demo.sh b/examples/industrial_data_pretraining/lcbnet/demo.sh
index 8252891..3e04ccd 100755
--- a/examples/industrial_data_pretraining/lcbnet/demo.sh
+++ b/examples/industrial_data_pretraining/lcbnet/demo.sh
@@ -1,5 +1,5 @@
-file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch"
-CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+file_dir="/home/yf352572/.cache/modelscope/hub/iic/LCB-NET/"
+CUDA_VISIBLE_DEVICES="0,1"
 inference_device="cuda"
 
 if [ ${inference_device} == "cuda" ]; then
@@ -12,7 +12,7 @@
     done
 fi
 
-inference_dir="outputs/slidespeech_dev_beamsearch_new"
+inference_dir="outputs/slidespeech_dev"
 _logdir="${inference_dir}/logdir"
 echo "inference_dir: ${inference_dir}"
 
@@ -39,11 +39,11 @@
         python -m funasr.bin.inference \
         --config-path=${file_dir} \
         --config-name="config.yaml" \
-        ++init_param=${file_dir}/model.pb \
+        ++init_param=${file_dir}/model.pt \
         ++tokenizer_conf.token_list=${file_dir}/tokens.txt \
         ++input=[${_logdir}/wav.${JOB}.scp,${_logdir}/ocr.${JOB}.txt] \
         +data_type='["kaldi_ark", "text"]' \
-        ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
+        ++tokenizer_conf.bpemodel=${file_dir}/bpe.pt \
         ++output_dir="${inference_dir}/${JOB}" \
         ++device="${inference_device}" \
         ++ncpu=1 \
diff --git a/examples/industrial_data_pretraining/lcbnet/demo2.sh b/examples/industrial_data_pretraining/lcbnet/demo2.sh
deleted file mode 100755
index 69df6d1..0000000
--- a/examples/industrial_data_pretraining/lcbnet/demo2.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch"
-CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-inference_device="cuda"
-test_set="dev_wav"
-if [ ${inference_device} == "cuda" ]; then
-    nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-else
-    inference_batch_size=1
-    CUDA_VISIBLE_DEVICES=""
-    for JOB in $(seq ${nj}); do
-        CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"-1,"
-    done
-fi
-
-inference_dir="outputs/slidespeech_dev_beamsearch_wav"
-_logdir="${inference_dir}/logdir"
-echo "inference_dir: ${inference_dir}"
-
-mkdir -p "${_logdir}"
-key_file1=${file_dir}/${test_set}/wav.scp
-key_file2=${file_dir}/${test_set}/ocr.txt
-split_scps1=
-split_scps2=
-for JOB in $(seq "${nj}"); do
-    split_scps1+=" ${_logdir}/wav.${JOB}.scp"
-    split_scps2+=" ${_logdir}/ocr.${JOB}.txt"
-done
-utils/split_scp.pl "${key_file1}" ${split_scps1}
-utils/split_scp.pl "${key_file2}" ${split_scps2}
-
-gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ })
-for JOB in $(seq ${nj}); do
-    {
-        id=$((JOB-1))
-        gpuid=${gpuid_list_array[$id]}
-
-        export CUDA_VISIBLE_DEVICES=${gpuid}
-
-        python -m funasr.bin.inference \
-        --config-path=${file_dir} \
-        --config-name="config.yaml" \
-        ++init_param=${file_dir}/model.pb \
-        ++tokenizer_conf.token_list=${file_dir}/tokens.txt \
-        ++input=[${_logdir}/wav.${JOB}.scp,${_logdir}/ocr.${JOB}.txt] \
-        +data_type='["sound", "text"]' \
-        ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
-        ++output_dir="${inference_dir}/${JOB}" \
-        ++device="${inference_device}" \
-        ++ncpu=1 \
-        ++disable_log=true  &> ${_logdir}/log.${JOB}.txt
-
-    }&
-done
-wait
-
-
-mkdir -p ${inference_dir}/1best_recog
-
-for JOB in $(seq "${nj}"); do
-   cat "${inference_dir}/${JOB}/1best_recog/token" >> "${inference_dir}/1best_recog/token"
-done  
-
-echo "Computing WER ..."
-sed -e 's/ /\t/' -e 's/ //g' -e 's/鈻�/ /g' -e 's/\t /\t/'  ${inference_dir}/1best_recog/token > ${inference_dir}/1best_recog/token.proc
-cp  ${file_dir}/${test_set}/text ${inference_dir}/1best_recog/token.ref
-cp  ${file_dir}/${test_set}/ocr.list ${inference_dir}/1best_recog/ocr.list
-python utils/compute_wer.py ${inference_dir}/1best_recog/token.ref ${inference_dir}/1best_recog/token.proc ${inference_dir}/1best_recog/token.cer
-tail -n 3 ${inference_dir}/1best_recog/token.cer
-
-./run_bwer_recall.sh  ${inference_dir}/1best_recog/
-tail -n 6 ${inference_dir}/1best_recog/BWER-UWER.results |head -n 5
diff --git a/examples/industrial_data_pretraining/lcbnet/demo2_tmp.sh b/examples/industrial_data_pretraining/lcbnet/demo2_tmp.sh
deleted file mode 100755
index da6ad68..0000000
--- a/examples/industrial_data_pretraining/lcbnet/demo2_tmp.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch"
-CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-inference_device="cuda"
-test_set="test_wav"
-if [ ${inference_device} == "cuda" ]; then
-    nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-else
-    inference_batch_size=1
-    CUDA_VISIBLE_DEVICES=""
-    for JOB in $(seq ${nj}); do
-        CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"-1,"
-    done
-fi
-
-inference_dir="outputs/slidespeech_test_beamsearch_wav"
-_logdir="${inference_dir}/logdir"
-echo "inference_dir: ${inference_dir}"
-
-mkdir -p "${_logdir}"
-key_file1=${file_dir}/${test_set}/wav.scp
-key_file2=${file_dir}/${test_set}/ocr.txt
-split_scps1=
-split_scps2=
-for JOB in $(seq "${nj}"); do
-    split_scps1+=" ${_logdir}/wav.${JOB}.scp"
-    split_scps2+=" ${_logdir}/ocr.${JOB}.txt"
-done
-utils/split_scp.pl "${key_file1}" ${split_scps1}
-utils/split_scp.pl "${key_file2}" ${split_scps2}
-
-gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ })
-for JOB in $(seq ${nj}); do
-    {
-        id=$((JOB-1))
-        gpuid=${gpuid_list_array[$id]}
-
-        export CUDA_VISIBLE_DEVICES=${gpuid}
-
-        python -m funasr.bin.inference \
-        --config-path=${file_dir} \
-        --config-name="config.yaml" \
-        ++init_param=${file_dir}/model.pb \
-        ++tokenizer_conf.token_list=${file_dir}/tokens.txt \
-        ++input=[${_logdir}/wav.${JOB}.scp,${_logdir}/ocr.${JOB}.txt] \
-        +data_type='["sound", "text"]' \
-        ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
-        ++output_dir="${inference_dir}/${JOB}" \
-        ++device="${inference_device}" \
-        ++ncpu=1 \
-        ++disable_log=true  &> ${_logdir}/log.${JOB}.txt
-
-    }&
-done
-wait
-
-
-mkdir -p ${inference_dir}/1best_recog
-
-for JOB in $(seq "${nj}"); do
-   cat "${inference_dir}/${JOB}/1best_recog/token" >> "${inference_dir}/1best_recog/token"
-done  
-
-echo "Computing WER ..."
-sed -e 's/ /\t/' -e 's/ //g' -e 's/鈻�/ /g' -e 's/\t /\t/'  ${inference_dir}/1best_recog/token > ${inference_dir}/1best_recog/token.proc
-cp  ${file_dir}/${test_set}/text ${inference_dir}/1best_recog/token.ref
-cp  ${file_dir}/${test_set}/ocr.list ${inference_dir}/1best_recog/ocr.list
-python utils/compute_wer.py ${inference_dir}/1best_recog/token.ref ${inference_dir}/1best_recog/token.proc ${inference_dir}/1best_recog/token.cer
-tail -n 3 ${inference_dir}/1best_recog/token.cer
-
-./run_bwer_recall.sh  ${inference_dir}/1best_recog/
-tail -n 6 ${inference_dir}/1best_recog/BWER-UWER.results |head -n 5
diff --git a/examples/industrial_data_pretraining/lcbnet/demo_pdb.sh b/examples/industrial_data_pretraining/lcbnet/demo_pdb.sh
deleted file mode 100755
index 0747a8d..0000000
--- a/examples/industrial_data_pretraining/lcbnet/demo_pdb.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch"
-
-#CUDA_VISIBLE_DEVICES="" \
-python -m funasr.bin.inference \
---config-path=${file_dir} \
---config-name="config.yaml" \
-++init_param=${file_dir}/model.pb \
-++tokenizer_conf.token_list=${file_dir}/tokens.txt \
-+input=["${file_dir}/example/asr_example.wav","${file_dir}/example/ocr.txt"] \
-+data_type='["sound","text"]' \
-++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
-++output_dir="./outputs/debug" \
-++device="cpu" \
-
-#++input=["/nfs/yufan.yf/workspace/espnet/egs2/youtube_ppt/asr/dump/raw/dev_oracle_v1_new/data/format.1/YTB+--tMoLpQI-w+00322.wav"] \
-#+data_type='["sound"]' \
-#++input=["/nfs/yufan.yf/workspace/espnet/egs2/youtube_ppt/asr/dump/raw/dev_oracle_v1_new/data/format.1/YTB+--tMoLpQI-w+00322.wav","/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch/example/ocr2.txt"]  \
-#+data_type='["sound","text"]' \
diff --git a/examples/industrial_data_pretraining/lcbnet/demo_pdb2.sh b/examples/industrial_data_pretraining/lcbnet/demo_pdb2.sh
deleted file mode 100755
index 557e9b2..0000000
--- a/examples/industrial_data_pretraining/lcbnet/demo_pdb2.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch"
-
-#CUDA_VISIBLE_DEVICES="" \
-python -m funasr.bin.inference \
---config-path=${file_dir} \
---config-name="config.yaml" \
-++init_param=${file_dir}/model.pb \
-++tokenizer_conf.token_list=${file_dir}/tokens.txt \
-++input=[${file_dir}/dev_wav/wav.scp,${file_dir}/dev_wav/ocr.txt] \
-+data_type='["sound", "text"]' \
-++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
-++output_dir="./outputs/debug" \
-++device="cpu" \
-
-#++input=[${file_dir}/dev_wav/wav.scp,${file_dir}/dev_wav/ocr.txt] \
diff --git a/examples/industrial_data_pretraining/lcbnet/demo_tmp1.sh b/examples/industrial_data_pretraining/lcbnet/demo_tmp1.sh
deleted file mode 100755
index 488f7d2..0000000
--- a/examples/industrial_data_pretraining/lcbnet/demo_tmp1.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch"
-CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-inference_device="cuda"
-
-if [ ${inference_device} == "cuda" ]; then
-    nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-else
-    inference_batch_size=1
-    CUDA_VISIBLE_DEVICES=""
-    for JOB in $(seq ${nj}); do
-        CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"-1,"
-    done
-fi
-
-inference_dir="outputs/slidespeech_test_beamsearch_new"
-_logdir="${inference_dir}/logdir"
-echo "inference_dir: ${inference_dir}"
-
-mkdir -p "${_logdir}"
-key_file1=${file_dir}/test/wav.scp
-key_file2=${file_dir}/test/ocr.txt
-split_scps1=
-split_scps2=
-for JOB in $(seq "${nj}"); do
-    split_scps1+=" ${_logdir}/wav.${JOB}.scp"
-    split_scps2+=" ${_logdir}/ocr.${JOB}.txt"
-done
-utils/split_scp.pl "${key_file1}" ${split_scps1}
-utils/split_scp.pl "${key_file2}" ${split_scps2}
-
-gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ })
-for JOB in $(seq ${nj}); do
-    {
-        id=$((JOB-1))
-        gpuid=${gpuid_list_array[$id]}
-
-        export CUDA_VISIBLE_DEVICES=${gpuid}
-
-        python -m funasr.bin.inference \
-        --config-path=${file_dir} \
-        --config-name="config.yaml" \
-        ++init_param=${file_dir}/model.pb \
-        ++tokenizer_conf.token_list=${file_dir}/tokens.txt \
-        ++input=[${_logdir}/wav.${JOB}.scp,${_logdir}/ocr.${JOB}.txt] \
-        +data_type='["kaldi_ark", "text"]' \
-        ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
-        ++output_dir="${inference_dir}/${JOB}" \
-        ++device="${inference_device}" \
-        ++ncpu=1 \
-        ++disable_log=true  &> ${_logdir}/log.${JOB}.txt
-
-    }&
-done
-wait
-
-
-mkdir -p ${inference_dir}/1best_recog
-
-for JOB in $(seq "${nj}"); do
-   cat "${inference_dir}/${JOB}/1best_recog/token" >> "${inference_dir}/1best_recog/token"
-done  
-
-echo "Computing WER ..."
-sed -e 's/ /\t/' -e 's/ //g' -e 's/鈻�/ /g' -e 's/\t /\t/'  ${inference_dir}/1best_recog/token > ${inference_dir}/1best_recog/token.proc
-cp  ${file_dir}/test/text ${inference_dir}/1best_recog/token.ref
-cp  ${file_dir}/test/ocr.list ${inference_dir}/1best_recog/ocr.list
-python utils/compute_wer.py ${inference_dir}/1best_recog/token.ref ${inference_dir}/1best_recog/token.proc ${inference_dir}/1best_recog/token.cer
-tail -n 3 ${inference_dir}/1best_recog/token.cer
-
-./run_bwer_recall.sh  ${inference_dir}/1best_recog/
-tail -n 6 ${inference_dir}/1best_recog/BWER-UWER.results |head -n 5
diff --git a/funasr/utils/load_utils.py b/funasr/utils/load_utils.py
index 644af23..84c38f9 100644
--- a/funasr/utils/load_utils.py
+++ b/funasr/utils/load_utils.py
@@ -89,8 +89,6 @@
     return array
 
 def extract_fbank(data, data_len = None, data_type: str="sound", frontend=None, **kwargs):
-    # import pdb;
-    # pdb.set_trace()
     if isinstance(data, np.ndarray):
         data = torch.from_numpy(data)
         if len(data.shape) < 2:

--
Gitblit v1.9.1