hnluo
2023-09-11 9fcb3cc06b4e324f0913d2f61b89becc2baeef1b
egs/aishell2/transformerLM/run.sh
@@ -93,25 +93,6 @@
        exit 2
    fi
    ## use_word_lm=false
    ## # Create word-list for word-LM training
    ## if ${use_word_lm} && [ "${token_type}" != word ]; then
    ##     echo "Generate word level token_list from ${lm_train_text}"
    ##     python -m funasr.bin.tokenize_text \
    ##         --token_type word \
    ##         --input "${lm_train_text}" \
    ##         --output "${token_list}" \
    ##         --field 2- \
    ##         --cleaner "${cleaner}" \
    ##         --g2p "${g2p}" \
    ##         --write_vocabulary true \
    ##         --vocabulary_size "${word_vocab_size}" \
    ##         --add_symbol "${blank}:0" \
    ##         --add_symbol "${sos}:1" \
    ##         --add_symbol "${eos}:2" \
    ##         --add_symbol "${oov}:-1"
    ## fi
    lm_token_list="${token_list}"
fi
@@ -232,14 +213,16 @@
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "Stage 3: Calc perplexity: ${lm_test_text}"
    
    python ../../../funasr/bin/lm_inference.py \
        --output_dir "${lm_exp}/perplexity_test" \
    python ../../../funasr/bin/lm_inference_launch.py \
        --output_dir "${lm_exp}/perplexity_test/output.1" \
        --ngpu "${gpu_num}" \
        --batch_size 1 \
        --train_config "${lm_exp}"/config.yaml \
        --model_file "${lm_exp}/${inference_lm}" \
        --data_path_and_name_and_type "${lm_test_text},text,text" \
        --num_workers 1 \
        --gpuid_list 0 \
        --mode "transformer" \
        --split_with_space false 
fi