python/FunASR-XL.git

parent: 1d7ba926 | 补丁 | 提交 | ignore whitespace

游雁

2024-02-21 4cf44a89f808411a0616c8ed92c3afae3d3e371a

bugfix

8个文件已修改

	examples/aishell/branchformer/run.sh	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	examples/aishell/conformer/run.sh	7 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	examples/aishell/e_branchformer/run.sh	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	examples/aishell/paraformer/run.sh	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	examples/aishell/transformer/run.sh	7 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	examples/industrial_data_pretraining/bicif_paraformer/demo.py	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/auto/auto_model.py	8 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/train_utils/trainer.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 examples/aishell/branchformer/run.sh

@@ -109,6 +109,7 @@
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"

  export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES
  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
  torchrun \
  --nnodes 1 \

 examples/aishell/conformer/run.sh

@@ -5,7 +5,7 @@

# general configuration
feats_dir="../DATA" #feature output dictionary
exp_dir="."
exp_dir=`pwd`
lang=zh
token_type=char
stage=0
@@ -109,6 +109,7 @@
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"

  export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES
  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
  torchrun \
  --nnodes 1 \
@@ -129,7 +130,7 @@
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
  echo "stage 5: Inference"

  if ${inference_device} == "cuda"; then
  if [ ${inference_device} == "cuda" ]; then
      nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
  else
      inference_batch_size=1
@@ -170,7 +171,7 @@
          ++input="${_logdir}/keys.${JOB}.scp" \
          ++output_dir="${inference_dir}/${JOB}" \
          ++device="${inference_device}" \
          ++batch_size="${inference_batch_size}"
          ++batch_size="${inference_batch_size}" &> ${_logdir}/log.${JOB}.txt
        }&

    done

 examples/aishell/e_branchformer/run.sh

@@ -109,6 +109,7 @@
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"

  export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES
  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
  torchrun \
  --nnodes 1 \

 examples/aishell/paraformer/run.sh

@@ -109,6 +109,7 @@
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"

  export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES
  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
  torchrun \
  --nnodes 1 \

 examples/aishell/transformer/run.sh

@@ -109,6 +109,7 @@
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"

  export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES
  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
  torchrun \
  --nnodes 1 \
@@ -129,7 +130,7 @@
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
  echo "stage 5: Inference"

  if ${inference_device} == "cuda"; then
  if [ ${inference_device} == "cuda" ]; then
      nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
  else
      inference_batch_size=1
@@ -141,7 +142,7 @@

  for dset in ${test_sets}; do

    inference_dir="${exp_dir}/exp/${model_dir}/${inference_checkpoint}/${dset}"
    inference_dir="${exp_dir}/exp/${model_dir}/infer-${inference_checkpoint}/${dset}"
    _logdir="${inference_dir}/logdir"

    mkdir -p "${_logdir}"
@@ -154,7 +155,7 @@
    done
    utils/split_scp.pl "${key_file}" ${split_scps}

    gpuid_list_array=(${gpuid_list//,/ })
    gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ })
    for JOB in $(seq ${nj}); do
        {
          id=$((JOB-1))

 examples/industrial_data_pretraining/bicif_paraformer/demo.py

@@ -11,8 +11,8 @@
                  vad_model_revision="v2.0.4",
                  punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
                  punc_model_revision="v2.0.4",
                  spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
                  spk_model_revision="v2.0.4",
                  # spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
                  # spk_model_revision="v2.0.2",
                  )

res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_vad_punc_example.wav", batch_size_s=300, batch_size_threshold_s=60)

 funasr/auto/auto_model.py

@@ -400,20 +400,20 @@
                    for res, vadsegment in zip(restored_data, vadsegments):
                        sentence_list.append({"start": vadsegment[0],\
                                                "end": vadsegment[1],
                                                "sentence": res['raw_text'],
                                                "sentence": res['text'],
                                                "timestamp": res['timestamp']})
                elif self.spk_mode == 'punc_segment':
                    sentence_list = timestamp_sentence(punc_res[0]['punc_array'], \
                                                        result['timestamp'], \
                                                        result['raw_text'])
                                                        result['text'])
                distribute_spk(sentence_list, sv_output)
                result['sentence_info'] = sentence_list
            elif kwargs.get("sentence_timestamp", False):
                sentence_list = timestamp_sentence(punc_res[0]['punc_array'], \
                                                        result['timestamp'], \
                                                        result['raw_text'])
                                                        result['text'])
                result['sentence_info'] = sentence_list
            del result['spk_embedding']
            if "spk_embedding" in result: del result['spk_embedding']
                    
            result["key"] = key
            results_ret_list.append(result)

 funasr/train_utils/trainer.py

@@ -279,7 +279,7 @@
                    f"epoch: {epoch}/{self.max_epoch}, "
                    f"step: {batch_idx+1}/{len(self.dataloader_train)}, total: {self.batch_total}, "
                    f"(loss: {loss.detach().cpu().item():.3f}), "
                    f"(lr: {lr}), "
                    f"(lr: {lr:.3e}), "
                    f"{[(k, round(v.cpu().item(), 3)) for k, v in stats.items()]}, "
                    f"{speed_stats}, "
                    f"{gpu_info}"

			@@ -109,6 +109,7 @@
			log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
			echo "log_file: ${log_file}"

			export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES
			gpu_num=$(echo $CUDA_VISIBLE_DEVICES \| awk -F "," '{print NF}')
			torchrun \
			--nnodes 1 \

			@@ -5,7 +5,7 @@

			# general configuration
			feats_dir="../DATA" #feature output dictionary
			exp_dir="."
			exp_dir=`pwd`
			lang=zh
			token_type=char
			stage=0
			@@ -109,6 +109,7 @@
			log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
			echo "log_file: ${log_file}"

			export CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES
			gpu_num=$(echo $CUDA_VISIBLE_DEVICES \| awk -F "," '{print NF}')
			torchrun \
			--nnodes 1 \
			@@ -129,7 +130,7 @@
			if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
			echo "stage 5: Inference"

			if ${inference_device} == "cuda"; then
			if [ ${inference_device} == "cuda" ]; then
			nj=$(echo $CUDA_VISIBLE_DEVICES \| awk -F "," '{print NF}')
			else
			inference_batch_size=1
			@@ -170,7 +171,7 @@
			++input="${_logdir}/keys.${JOB}.scp" \
			++output_dir="${inference_dir}/${JOB}" \
			++device="${inference_device}" \
			++batch_size="${inference_batch_size}"
			++batch_size="${inference_batch_size}" &> ${_logdir}/log.${JOB}.txt
			}&

			done

			@@ -11,8 +11,8 @@
			vad_model_revision="v2.0.4",
			punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
			punc_model_revision="v2.0.4",
			spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
			spk_model_revision="v2.0.4",
			# spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
			# spk_model_revision="v2.0.2",
			)

			res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_vad_punc_example.wav", batch_size_s=300, batch_size_threshold_s=60)

			@@ -400,20 +400,20 @@
			for res, vadsegment in zip(restored_data, vadsegments):
			sentence_list.append({"start": vadsegment[0],\
			"end": vadsegment[1],
			"sentence": res['raw_text'],
			"sentence": res['text'],
			"timestamp": res['timestamp']})
			elif self.spk_mode == 'punc_segment':
			sentence_list = timestamp_sentence(punc_res[0]['punc_array'], \
			result['timestamp'], \
			result['raw_text'])
			result['text'])
			distribute_spk(sentence_list, sv_output)
			result['sentence_info'] = sentence_list
			elif kwargs.get("sentence_timestamp", False):
			sentence_list = timestamp_sentence(punc_res[0]['punc_array'], \
			result['timestamp'], \
			result['raw_text'])
			result['text'])
			result['sentence_info'] = sentence_list
			del result['spk_embedding']
			if "spk_embedding" in result: del result['spk_embedding']

			result["key"] = key
			results_ret_list.append(result)

			@@ -279,7 +279,7 @@
			f"epoch: {epoch}/{self.max_epoch}, "
			f"step: {batch_idx+1}/{len(self.dataloader_train)}, total: {self.batch_total}, "
			f"(loss: {loss.detach().cpu().item():.3f}), "
			f"(lr: {lr}), "
			f"(lr: {lr:.3e}), "
			f"{[(k, round(v.cpu().item(), 3)) for k, v in stats.items()]}, "
			f"{speed_stats}, "
			f"{gpu_info}"