python/FunASR-XL.git

#!/usr/bin/env bash
 
. ./path.sh || exit 1;
 
# machines configuration
CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
gpu_num=8
count=1
gpu_inference=true  # Whether to perform gpu decoding, set false for cpu decoding
# for gpu decoding, inference_nj=ngpu*njob; for cpu decoding, inference_nj=njob
njob=5
train_cmd=utils/run.pl
infer_cmd=utils/run.pl
 
# general configuration
feats_dir="../DATA" #feature output dictionary
exp_dir="."
lang=zh
token_type=char
type=sound
scp=wav.scp
speed_perturb="0.9 1.0 1.1"
stage=1
stop_stage=5
 
# feature configuration
feats_dim=80
nj=64
 
# data
raw_data=/nfs/zhifu.gzf/wenetspeech_proc
 
# exp tag
tag="exp1"
 
. utils/parse_options.sh || exit 1;
 
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail
 
train_set=train_l
valid_set=dev
test_sets="dev test_net test_meeting"
 
asr_config=conf/train_asr_conformer.yaml
model_dir="baseline_$(basename "${asr_config}" .yaml)_${lang}_${token_type}_${tag}"
 
inference_config=conf/decode_asr_transformer_5beam.yaml
inference_asr_model=valid.acc.ave_10best.pb
 
# you can set gpu num for decoding here
gpuid_list=$CUDA_VISIBLE_DEVICES  # set gpus for decoding, the same as training stage by default
ngpu=$(echo $gpuid_list | awk -F "," '{print NF}')
 
if ${gpu_inference}; then
    inference_nj=$[${ngpu}*${njob}]
    _ngpu=1
else
    inference_nj=$njob
    _ngpu=0
fi
 
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    echo "For downloading data, please refer to https://github.com/wenet-e2e/WenetSpeech."
    exit 0;
fi
 
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    echo "stage 0: Data preparation"
    # Data preparation
    local/wenetspeech_data_prep.sh $raw_data $feats_dir
    mkdir $feats_dir/data
    mv $feats_dir/$train_set $feats_dir/data/$train_set
    for x in $test_sets; do
        mv $feats_dir/$x $feats_dir/data/
    done
fi
 
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo "stage 1: Feature and CMVN Generation"
    utils/compute_cmvn.sh ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config "$asr_config" --scale 0.1
fi