python/FunASR-XL.git

			@@ -93,25 +93,6 @@
			exit 2
			fi

			## use_word_lm=false
			## # Create word-list for word-LM training
			## if ${use_word_lm} && [ "${token_type}" != word ]; then
			## echo "Generate word level token_list from ${lm_train_text}"
			## python -m funasr.bin.tokenize_text \
			## --token_type word \
			## --input "${lm_train_text}" \
			## --output "${token_list}" \
			## --field 2- \
			## --cleaner "${cleaner}" \
			## --g2p "${g2p}" \
			## --write_vocabulary true \
			## --vocabulary_size "${word_vocab_size}" \
			## --add_symbol "${blank}:0" \
			## --add_symbol "${sos}:1" \
			## --add_symbol "${eos}:2" \
			## --add_symbol "${oov}:-1"
			## fi

			lm_token_list="${token_list}"

			fi
			@@ -232,14 +213,16 @@
			if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
			echo "Stage 3: Calc perplexity: ${lm_test_text}"

			python ../../../funasr/bin/lm_inference.py \
			--output_dir "${lm_exp}/perplexity_test" \
			python ../../../funasr/bin/lm_inference_launch.py \
			--output_dir "${lm_exp}/perplexity_test/output.1" \
			--ngpu "${gpu_num}" \
			--batch_size 1 \
			--train_config "${lm_exp}"/config.yaml \
			--model_file "${lm_exp}/${inference_lm}" \
			--data_path_and_name_and_type "${lm_test_text},text,text" \
			--num_workers 1 \
			--gpuid_list 0 \
			--mode "transformer" \
			--split_with_space false
			fi