From 1a6d9d5cc422dcd1e6dd5b9c67047d63bc6cd667 Mon Sep 17 00:00:00 2001
From: 语帆 <yf352572@alibaba-inc.com>
Date: 星期一, 04 三月 2024 16:32:28 +0800
Subject: [PATCH] atsr
---
/dev/null | 71 -----------------------------------
.gitignore | 1
funasr/utils/load_utils.py | 2 -
examples/industrial_data_pretraining/lcbnet/demo.py | 2
examples/industrial_data_pretraining/lcbnet/demo.sh | 10 ++--
5 files changed, 7 insertions(+), 79 deletions(-)
diff --git a/.gitignore b/.gitignore
index d2b4c53..1f2a3d1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,4 +26,5 @@
GPT-SoVITS*
examples/*/*/outputs
examples/*/*/exp
+examples/*/*/tmp
cmd_read
diff --git a/examples/industrial_data_pretraining/lcbnet/demo.py b/examples/industrial_data_pretraining/lcbnet/demo.py
index d0870bc..602a986 100755
--- a/examples/industrial_data_pretraining/lcbnet/demo.py
+++ b/examples/industrial_data_pretraining/lcbnet/demo.py
@@ -10,7 +10,7 @@
# example1
-res = model.generate(input='["~/.cache/modelscope/hub/iic/LCB-NET/example/asr_example.wav","~/.cache/modelscope/hub/iic/LCB-NET/example/ocr.txt"]',data_type='["sound", "text"]')
+res = model.generate(input=("https://www.modelscope.cn/api/v1/models/iic/LCB-NET/repo?Revision=master&FilePath=example/asr_example.wav","https://www.modelscope.cn/api/v1/models/iic/LCB-NET/repo?Revision=master&FilePath=example/ocr.txt"),data_type=("sound", "text"))
print(res)
diff --git a/examples/industrial_data_pretraining/lcbnet/demo.sh b/examples/industrial_data_pretraining/lcbnet/demo.sh
index 8252891..3e04ccd 100755
--- a/examples/industrial_data_pretraining/lcbnet/demo.sh
+++ b/examples/industrial_data_pretraining/lcbnet/demo.sh
@@ -1,5 +1,5 @@
-file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch"
-CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+file_dir="/home/yf352572/.cache/modelscope/hub/iic/LCB-NET/"
+CUDA_VISIBLE_DEVICES="0,1"
inference_device="cuda"
if [ ${inference_device} == "cuda" ]; then
@@ -12,7 +12,7 @@
done
fi
-inference_dir="outputs/slidespeech_dev_beamsearch_new"
+inference_dir="outputs/slidespeech_dev"
_logdir="${inference_dir}/logdir"
echo "inference_dir: ${inference_dir}"
@@ -39,11 +39,11 @@
python -m funasr.bin.inference \
--config-path=${file_dir} \
--config-name="config.yaml" \
- ++init_param=${file_dir}/model.pb \
+ ++init_param=${file_dir}/model.pt \
++tokenizer_conf.token_list=${file_dir}/tokens.txt \
++input=[${_logdir}/wav.${JOB}.scp,${_logdir}/ocr.${JOB}.txt] \
+data_type='["kaldi_ark", "text"]' \
- ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
+ ++tokenizer_conf.bpemodel=${file_dir}/bpe.pt \
++output_dir="${inference_dir}/${JOB}" \
++device="${inference_device}" \
++ncpu=1 \
diff --git a/examples/industrial_data_pretraining/lcbnet/demo2.sh b/examples/industrial_data_pretraining/lcbnet/demo2.sh
deleted file mode 100755
index 69df6d1..0000000
--- a/examples/industrial_data_pretraining/lcbnet/demo2.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch"
-CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-inference_device="cuda"
-test_set="dev_wav"
-if [ ${inference_device} == "cuda" ]; then
- nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-else
- inference_batch_size=1
- CUDA_VISIBLE_DEVICES=""
- for JOB in $(seq ${nj}); do
- CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"-1,"
- done
-fi
-
-inference_dir="outputs/slidespeech_dev_beamsearch_wav"
-_logdir="${inference_dir}/logdir"
-echo "inference_dir: ${inference_dir}"
-
-mkdir -p "${_logdir}"
-key_file1=${file_dir}/${test_set}/wav.scp
-key_file2=${file_dir}/${test_set}/ocr.txt
-split_scps1=
-split_scps2=
-for JOB in $(seq "${nj}"); do
- split_scps1+=" ${_logdir}/wav.${JOB}.scp"
- split_scps2+=" ${_logdir}/ocr.${JOB}.txt"
-done
-utils/split_scp.pl "${key_file1}" ${split_scps1}
-utils/split_scp.pl "${key_file2}" ${split_scps2}
-
-gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ })
-for JOB in $(seq ${nj}); do
- {
- id=$((JOB-1))
- gpuid=${gpuid_list_array[$id]}
-
- export CUDA_VISIBLE_DEVICES=${gpuid}
-
- python -m funasr.bin.inference \
- --config-path=${file_dir} \
- --config-name="config.yaml" \
- ++init_param=${file_dir}/model.pb \
- ++tokenizer_conf.token_list=${file_dir}/tokens.txt \
- ++input=[${_logdir}/wav.${JOB}.scp,${_logdir}/ocr.${JOB}.txt] \
- +data_type='["sound", "text"]' \
- ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
- ++output_dir="${inference_dir}/${JOB}" \
- ++device="${inference_device}" \
- ++ncpu=1 \
- ++disable_log=true &> ${_logdir}/log.${JOB}.txt
-
- }&
-done
-wait
-
-
-mkdir -p ${inference_dir}/1best_recog
-
-for JOB in $(seq "${nj}"); do
- cat "${inference_dir}/${JOB}/1best_recog/token" >> "${inference_dir}/1best_recog/token"
-done
-
-echo "Computing WER ..."
-sed -e 's/ /\t/' -e 's/ //g' -e 's/鈻�/ /g' -e 's/\t /\t/' ${inference_dir}/1best_recog/token > ${inference_dir}/1best_recog/token.proc
-cp ${file_dir}/${test_set}/text ${inference_dir}/1best_recog/token.ref
-cp ${file_dir}/${test_set}/ocr.list ${inference_dir}/1best_recog/ocr.list
-python utils/compute_wer.py ${inference_dir}/1best_recog/token.ref ${inference_dir}/1best_recog/token.proc ${inference_dir}/1best_recog/token.cer
-tail -n 3 ${inference_dir}/1best_recog/token.cer
-
-./run_bwer_recall.sh ${inference_dir}/1best_recog/
-tail -n 6 ${inference_dir}/1best_recog/BWER-UWER.results |head -n 5
diff --git a/examples/industrial_data_pretraining/lcbnet/demo2_tmp.sh b/examples/industrial_data_pretraining/lcbnet/demo2_tmp.sh
deleted file mode 100755
index da6ad68..0000000
--- a/examples/industrial_data_pretraining/lcbnet/demo2_tmp.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch"
-CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-inference_device="cuda"
-test_set="test_wav"
-if [ ${inference_device} == "cuda" ]; then
- nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-else
- inference_batch_size=1
- CUDA_VISIBLE_DEVICES=""
- for JOB in $(seq ${nj}); do
- CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"-1,"
- done
-fi
-
-inference_dir="outputs/slidespeech_test_beamsearch_wav"
-_logdir="${inference_dir}/logdir"
-echo "inference_dir: ${inference_dir}"
-
-mkdir -p "${_logdir}"
-key_file1=${file_dir}/${test_set}/wav.scp
-key_file2=${file_dir}/${test_set}/ocr.txt
-split_scps1=
-split_scps2=
-for JOB in $(seq "${nj}"); do
- split_scps1+=" ${_logdir}/wav.${JOB}.scp"
- split_scps2+=" ${_logdir}/ocr.${JOB}.txt"
-done
-utils/split_scp.pl "${key_file1}" ${split_scps1}
-utils/split_scp.pl "${key_file2}" ${split_scps2}
-
-gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ })
-for JOB in $(seq ${nj}); do
- {
- id=$((JOB-1))
- gpuid=${gpuid_list_array[$id]}
-
- export CUDA_VISIBLE_DEVICES=${gpuid}
-
- python -m funasr.bin.inference \
- --config-path=${file_dir} \
- --config-name="config.yaml" \
- ++init_param=${file_dir}/model.pb \
- ++tokenizer_conf.token_list=${file_dir}/tokens.txt \
- ++input=[${_logdir}/wav.${JOB}.scp,${_logdir}/ocr.${JOB}.txt] \
- +data_type='["sound", "text"]' \
- ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
- ++output_dir="${inference_dir}/${JOB}" \
- ++device="${inference_device}" \
- ++ncpu=1 \
- ++disable_log=true &> ${_logdir}/log.${JOB}.txt
-
- }&
-done
-wait
-
-
-mkdir -p ${inference_dir}/1best_recog
-
-for JOB in $(seq "${nj}"); do
- cat "${inference_dir}/${JOB}/1best_recog/token" >> "${inference_dir}/1best_recog/token"
-done
-
-echo "Computing WER ..."
-sed -e 's/ /\t/' -e 's/ //g' -e 's/鈻�/ /g' -e 's/\t /\t/' ${inference_dir}/1best_recog/token > ${inference_dir}/1best_recog/token.proc
-cp ${file_dir}/${test_set}/text ${inference_dir}/1best_recog/token.ref
-cp ${file_dir}/${test_set}/ocr.list ${inference_dir}/1best_recog/ocr.list
-python utils/compute_wer.py ${inference_dir}/1best_recog/token.ref ${inference_dir}/1best_recog/token.proc ${inference_dir}/1best_recog/token.cer
-tail -n 3 ${inference_dir}/1best_recog/token.cer
-
-./run_bwer_recall.sh ${inference_dir}/1best_recog/
-tail -n 6 ${inference_dir}/1best_recog/BWER-UWER.results |head -n 5
diff --git a/examples/industrial_data_pretraining/lcbnet/demo_pdb.sh b/examples/industrial_data_pretraining/lcbnet/demo_pdb.sh
deleted file mode 100755
index 0747a8d..0000000
--- a/examples/industrial_data_pretraining/lcbnet/demo_pdb.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch"
-
-#CUDA_VISIBLE_DEVICES="" \
-python -m funasr.bin.inference \
---config-path=${file_dir} \
---config-name="config.yaml" \
-++init_param=${file_dir}/model.pb \
-++tokenizer_conf.token_list=${file_dir}/tokens.txt \
-+input=["${file_dir}/example/asr_example.wav","${file_dir}/example/ocr.txt"] \
-+data_type='["sound","text"]' \
-++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
-++output_dir="./outputs/debug" \
-++device="cpu" \
-
-#++input=["/nfs/yufan.yf/workspace/espnet/egs2/youtube_ppt/asr/dump/raw/dev_oracle_v1_new/data/format.1/YTB+--tMoLpQI-w+00322.wav"] \
-#+data_type='["sound"]' \
-#++input=["/nfs/yufan.yf/workspace/espnet/egs2/youtube_ppt/asr/dump/raw/dev_oracle_v1_new/data/format.1/YTB+--tMoLpQI-w+00322.wav","/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch/example/ocr2.txt"] \
-#+data_type='["sound","text"]' \
diff --git a/examples/industrial_data_pretraining/lcbnet/demo_pdb2.sh b/examples/industrial_data_pretraining/lcbnet/demo_pdb2.sh
deleted file mode 100755
index 557e9b2..0000000
--- a/examples/industrial_data_pretraining/lcbnet/demo_pdb2.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch"
-
-#CUDA_VISIBLE_DEVICES="" \
-python -m funasr.bin.inference \
---config-path=${file_dir} \
---config-name="config.yaml" \
-++init_param=${file_dir}/model.pb \
-++tokenizer_conf.token_list=${file_dir}/tokens.txt \
-++input=[${file_dir}/dev_wav/wav.scp,${file_dir}/dev_wav/ocr.txt] \
-+data_type='["sound", "text"]' \
-++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
-++output_dir="./outputs/debug" \
-++device="cpu" \
-
-#++input=[${file_dir}/dev_wav/wav.scp,${file_dir}/dev_wav/ocr.txt] \
diff --git a/examples/industrial_data_pretraining/lcbnet/demo_tmp1.sh b/examples/industrial_data_pretraining/lcbnet/demo_tmp1.sh
deleted file mode 100755
index 488f7d2..0000000
--- a/examples/industrial_data_pretraining/lcbnet/demo_tmp1.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-file_dir="/nfs/yufan.yf/workspace/github/FunASR/examples/industrial_data_pretraining/lcbnet/exp/speech_lcbnet_contextual_asr-en-16k-bpe-vocab5002-pytorch"
-CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
-inference_device="cuda"
-
-if [ ${inference_device} == "cuda" ]; then
- nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-else
- inference_batch_size=1
- CUDA_VISIBLE_DEVICES=""
- for JOB in $(seq ${nj}); do
- CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"-1,"
- done
-fi
-
-inference_dir="outputs/slidespeech_test_beamsearch_new"
-_logdir="${inference_dir}/logdir"
-echo "inference_dir: ${inference_dir}"
-
-mkdir -p "${_logdir}"
-key_file1=${file_dir}/test/wav.scp
-key_file2=${file_dir}/test/ocr.txt
-split_scps1=
-split_scps2=
-for JOB in $(seq "${nj}"); do
- split_scps1+=" ${_logdir}/wav.${JOB}.scp"
- split_scps2+=" ${_logdir}/ocr.${JOB}.txt"
-done
-utils/split_scp.pl "${key_file1}" ${split_scps1}
-utils/split_scp.pl "${key_file2}" ${split_scps2}
-
-gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ })
-for JOB in $(seq ${nj}); do
- {
- id=$((JOB-1))
- gpuid=${gpuid_list_array[$id]}
-
- export CUDA_VISIBLE_DEVICES=${gpuid}
-
- python -m funasr.bin.inference \
- --config-path=${file_dir} \
- --config-name="config.yaml" \
- ++init_param=${file_dir}/model.pb \
- ++tokenizer_conf.token_list=${file_dir}/tokens.txt \
- ++input=[${_logdir}/wav.${JOB}.scp,${_logdir}/ocr.${JOB}.txt] \
- +data_type='["kaldi_ark", "text"]' \
- ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \
- ++output_dir="${inference_dir}/${JOB}" \
- ++device="${inference_device}" \
- ++ncpu=1 \
- ++disable_log=true &> ${_logdir}/log.${JOB}.txt
-
- }&
-done
-wait
-
-
-mkdir -p ${inference_dir}/1best_recog
-
-for JOB in $(seq "${nj}"); do
- cat "${inference_dir}/${JOB}/1best_recog/token" >> "${inference_dir}/1best_recog/token"
-done
-
-echo "Computing WER ..."
-sed -e 's/ /\t/' -e 's/ //g' -e 's/鈻�/ /g' -e 's/\t /\t/' ${inference_dir}/1best_recog/token > ${inference_dir}/1best_recog/token.proc
-cp ${file_dir}/test/text ${inference_dir}/1best_recog/token.ref
-cp ${file_dir}/test/ocr.list ${inference_dir}/1best_recog/ocr.list
-python utils/compute_wer.py ${inference_dir}/1best_recog/token.ref ${inference_dir}/1best_recog/token.proc ${inference_dir}/1best_recog/token.cer
-tail -n 3 ${inference_dir}/1best_recog/token.cer
-
-./run_bwer_recall.sh ${inference_dir}/1best_recog/
-tail -n 6 ${inference_dir}/1best_recog/BWER-UWER.results |head -n 5
diff --git a/funasr/utils/load_utils.py b/funasr/utils/load_utils.py
index 644af23..84c38f9 100644
--- a/funasr/utils/load_utils.py
+++ b/funasr/utils/load_utils.py
@@ -89,8 +89,6 @@
return array
def extract_fbank(data, data_len = None, data_type: str="sound", frontend=None, **kwargs):
- # import pdb;
- # pdb.set_trace()
if isinstance(data, np.ndarray):
data = torch.from_numpy(data)
if len(data.shape) < 2:
--
Gitblit v1.9.1