python/FunASR-XL.git

parent: 81fe1e0a | 补丁 | 提交 | ignore whitespace

嘉渊

2023-07-19 f5bd371837cc3b89e6d387ecc84469a0e513fbd6

update

4个文件已修改

	egs/callhome/eend_ola/local/infer.py	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	egs/callhome/eend_ola/run.sh	24 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	egs/callhome/eend_ola/run_test.sh	5 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/e2e_diar_eend_ola.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 egs/callhome/eend_ola/local/infer.py

@@ -54,7 +54,7 @@
    parser.add_argument(
        "--sampling_rate",
        type=int,
        default=10,
        default=8000,
        help="sampling rate",
    )
    parser.add_argument(
@@ -104,7 +104,7 @@
    print("Start inference")
    with open(args.output_rttm_file, "w") as wf:
        for wav_id in wav_items.keys():
            print("Process wav: {}\n".format(wav_id))
            print("Process wav: {}".format(wav_id))
            data, rate = sf.read(wav_items[wav_id])
            speech = eend_ola_feature.stft(data, args.frame_size, args.frame_shift)
            speech = eend_ola_feature.transform(speech)

 egs/callhome/eend_ola/run.sh

@@ -245,13 +245,17 @@
    python local/model_averaging.py ${exp_dir}/exp/${callhome_model_dir}/$callhome_ave_id.pb $models
fi

## inference
#if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
#    echo "Inference"
#    mkdir -p ${exp_dir}/exp/${callhome_model_dir}/inference/log
#    CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python local/infer.py \
#        --config_file ${exp_dir}/exp/${callhome_model_dir}/config.yaml \
#        --model_file ${exp_dir}/exp/${callhome_model_dir}/$callhome_ave_id.pb \
#        --output_rttm_file ${exp_dir}/exp/${callhome_model_dir}/inference/rttm \
#        --wav_scp_file ${callhome_feats_dir_chunk2000}/${callhome_valid_dataset}/${callhome2_wav_scp_file} 1> ${exp_dir}/exp/${callhome_model_dir}/inference/log/infer.log 2>&1
#fi
# inference and compute DER
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    echo "Inference"
    mkdir -p ${exp_dir}/exp/${callhome_model_dir}/inference/log
    CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python local/infer.py \
        --config_file ${exp_dir}/exp/${callhome_model_dir}/config.yaml \
        --model_file ${exp_dir}/exp/${callhome_model_dir}/$callhome_ave_id.pb \
        --output_rttm_file ${exp_dir}/exp/${callhome_model_dir}/inference/rttm \
        --wav_scp_file ${callhome_feats_dir_chunk2000}/${callhome_valid_dataset}/${callhome2_wav_scp_file} \
        1> ${exp_dir}/exp/${callhome_model_dir}/inference/log/infer.log 2>&1
    md-eval.pl -c 0.25 \
          -r ${callhome_feats_dir_chunk2000}/${callhome_valid_dataset}/rttm \
          -s ${exp_dir}/exp/${callhome_model_dir}/inference/rttm > ${exp_dir}/exp/${callhome_model_dir}/inference/result_med11_collar0.25 2>/dev/null || exit
fi

 egs/callhome/eend_ola/run_test.sh

@@ -245,7 +245,7 @@
    python local/model_averaging.py ${exp_dir}/exp/${callhome_model_dir}/$callhome_ave_id.pb $models
fi

# inference
# inference and compute DER
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    echo "Inference"
    mkdir -p ${exp_dir}/exp/${callhome_model_dir}/inference/log
@@ -255,4 +255,7 @@
        --output_rttm_file ${exp_dir}/exp/${callhome_model_dir}/inference/rttm \
        --wav_scp_file ${callhome_feats_dir_chunk2000}/${callhome_valid_dataset}/${callhome2_wav_scp_file} \
        1> ${exp_dir}/exp/${callhome_model_dir}/inference/log/infer.log 2>&1
    md-eval.pl -c 0.25 \
          -r ${callhome_feats_dir_chunk2000}/${callhome_valid_dataset}/rttm \
          -s ${exp_dir}/exp/${callhome_model_dir}/inference/rttm > ${exp_dir}/exp/${callhome_model_dir}/inference/result_med11_collar0.25 2>/dev/null || exit
fi

 funasr/models/e2e_diar_eend_ola.py

@@ -157,12 +157,11 @@

    def estimate_sequential(self,
                            speech: torch.Tensor,
                            speech_lengths: torch.Tensor,
                            n_speakers: int = None,
                            shuffle: bool = True,
                            threshold: float = 0.5,
                            **kwargs):
        speech = [s[:s_len] for s, s_len in zip(speech, speech_lengths)]
        speech_lengths = torch.tensor([len(sph) for sph in speech]).to(torch.int64)
        emb = self.forward_encoder(speech, speech_lengths)
        if shuffle:
            orders = [np.arange(e.shape[0]) for e in emb]

			@@ -54,7 +54,7 @@
			parser.add_argument(
			"--sampling_rate",
			type=int,
			default=10,
			default=8000,
			help="sampling rate",
			)
			parser.add_argument(
			@@ -104,7 +104,7 @@
			print("Start inference")
			with open(args.output_rttm_file, "w") as wf:
			for wav_id in wav_items.keys():
			print("Process wav: {}\n".format(wav_id))
			print("Process wav: {}".format(wav_id))
			data, rate = sf.read(wav_items[wav_id])
			speech = eend_ola_feature.stft(data, args.frame_size, args.frame_shift)
			speech = eend_ola_feature.transform(speech)

			@@ -245,13 +245,17 @@
			python local/model_averaging.py ${exp_dir}/exp/${callhome_model_dir}/$callhome_ave_id.pb $models
			fi

			## inference
			#if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
			# echo "Inference"
			# mkdir -p ${exp_dir}/exp/${callhome_model_dir}/inference/log
			# CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python local/infer.py \
			# --config_file ${exp_dir}/exp/${callhome_model_dir}/config.yaml \
			# --model_file ${exp_dir}/exp/${callhome_model_dir}/$callhome_ave_id.pb \
			# --output_rttm_file ${exp_dir}/exp/${callhome_model_dir}/inference/rttm \
			# --wav_scp_file ${callhome_feats_dir_chunk2000}/${callhome_valid_dataset}/${callhome2_wav_scp_file} 1> ${exp_dir}/exp/${callhome_model_dir}/inference/log/infer.log 2>&1
			#fi
			# inference and compute DER
			if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
			echo "Inference"
			mkdir -p ${exp_dir}/exp/${callhome_model_dir}/inference/log
			CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES python local/infer.py \
			--config_file ${exp_dir}/exp/${callhome_model_dir}/config.yaml \
			--model_file ${exp_dir}/exp/${callhome_model_dir}/$callhome_ave_id.pb \
			--output_rttm_file ${exp_dir}/exp/${callhome_model_dir}/inference/rttm \
			--wav_scp_file ${callhome_feats_dir_chunk2000}/${callhome_valid_dataset}/${callhome2_wav_scp_file} \
			1> ${exp_dir}/exp/${callhome_model_dir}/inference/log/infer.log 2>&1
			md-eval.pl -c 0.25 \
			-r ${callhome_feats_dir_chunk2000}/${callhome_valid_dataset}/rttm \
			-s ${exp_dir}/exp/${callhome_model_dir}/inference/rttm > ${exp_dir}/exp/${callhome_model_dir}/inference/result_med11_collar0.25 2>/dev/null \|\| exit
			fi

			@@ -245,7 +245,7 @@
			python local/model_averaging.py ${exp_dir}/exp/${callhome_model_dir}/$callhome_ave_id.pb $models
			fi

			# inference
			# inference and compute DER
			if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
			echo "Inference"
			mkdir -p ${exp_dir}/exp/${callhome_model_dir}/inference/log
			@@ -255,4 +255,7 @@
			--output_rttm_file ${exp_dir}/exp/${callhome_model_dir}/inference/rttm \
			--wav_scp_file ${callhome_feats_dir_chunk2000}/${callhome_valid_dataset}/${callhome2_wav_scp_file} \
			1> ${exp_dir}/exp/${callhome_model_dir}/inference/log/infer.log 2>&1
			md-eval.pl -c 0.25 \
			-r ${callhome_feats_dir_chunk2000}/${callhome_valid_dataset}/rttm \
			-s ${exp_dir}/exp/${callhome_model_dir}/inference/rttm > ${exp_dir}/exp/${callhome_model_dir}/inference/result_med11_collar0.25 2>/dev/null \|\| exit
			fi

			@@ -157,12 +157,11 @@

			def estimate_sequential(self,
			speech: torch.Tensor,
			speech_lengths: torch.Tensor,
			n_speakers: int = None,
			shuffle: bool = True,
			threshold: float = 0.5,
			**kwargs):
			speech = [s[:s_len] for s, s_len in zip(speech, speech_lengths)]
			speech_lengths = torch.tensor([len(sph) for sph in speech]).to(torch.int64)
			emb = self.forward_encoder(speech, speech_lengths)
			if shuffle:
			orders = [np.arange(e.shape[0]) for e in emb]