python/FunASR-XL.git

			@@ -17,7 +17,7 @@
			diar_model_config="sond.yaml",
			model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch',
			sv_model="damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch",
			sv_model_revision="master",
			sv_model_revision="v1.2.2",
			)

			# use audio_list as the input, where the first one is the record to be detected

			@@ -51,7 +51,7 @@

			```shell
			cd bin
			./funasr-wss-server [--model_thread_num <int>] [--decoder_thread_num <int>]
			./funasr-wss-server [--model_thread_num <int>] [--decoder_thread_num <int>]
			[--io_thread_num <int>] [--port <int>] [--listen_ip
			<string>] [--punc-quant <string>] [--punc-dir <string>]
			[--vad-quant <string>] [--vad-dir <string>] [--quantize
			@@ -85,7 +85,7 @@
			default: ../../../ssl_key/server.key, path of keyfile for WSS connection

			example:
			funasr-wss-server --model-dir /FunASR/funasr/runtime/onnxruntime/export/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
			./funasr-wss-server --model-dir /FunASR/funasr/runtime/onnxruntime/export/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
			```

			## Run websocket client test

			@@ -19,30 +19,20 @@
			task=Tasks.speaker_verification,
			model='damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch'
			)
			# 提取不同句子的说话人嵌入码
			rec_result = inference_sv_pipline(
			audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav')
			enroll = rec_result["spk_embedding"]

			rec_result = inference_sv_pipline(
			audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_same.wav')
			same = rec_result["spk_embedding"]

			rec_result = inference_sv_pipline(
			audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_different.wav')
			different = rec_result["spk_embedding"]

			# 对相同的说话人计算余弦相似度
			sv_threshold = 0.9465
			same_cos = np.sum(enroll * same) / (np.linalg.norm(enroll) * np.linalg.norm(same))
			same_cos = max(same_cos - sv_threshold, 0.0) / (1.0 - sv_threshold) * 100.0
			logger.info("Similarity: {}".format(same_cos))

			# 对不同的说话人计算余弦相似度
			diff_cos = np.sum(enroll * different) / (np.linalg.norm(enroll) * np.linalg.norm(different))
			diff_cos = max(diff_cos - sv_threshold, 0.0) / (1.0 - sv_threshold) * 100.0
			logger.info("Similarity: {}".format(diff_cos))

			# the same speaker
			rec_result = inference_sv_pipline(audio_in=(
			'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
			'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_same.wav'))
			assert abs(rec_result["scores"][0]-0.85) < 0.1 and abs(rec_result["scores"][1]-0.14) < 0.1
			logger.info(f"Similarity {rec_result['scores']}")

			# different speaker
			rec_result = inference_sv_pipline(audio_in=(
			'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
			'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_different.wav'))
			assert abs(rec_result["scores"][0]-0.0) < 0.1 and abs(rec_result["scores"][1]-1.0) < 0.1
			logger.info(f"Similarity {rec_result['scores']}")

			if __name__ == '__main__':
			unittest.main()

	egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/websocket/readme.md	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	tests/test_sv_inference_pipeline.py	36 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史