python/FunASR-XL.git

			@@ -1,6 +1,7 @@
			import os
			import torch
			import json
			from io import BytesIO
			import torch.distributed as dist
			import numpy as np
			import kaldiio
			@@ -9,6 +10,7 @@
			import time
			import logging
			from torch.nn.utils.rnn import pad_sequence
			from pydub import AudioSegment

			try:
			from funasr.download.file import download_from_url
			@@ -136,6 +138,7 @@


			def load_bytes(input):
			input = validate_frame_rate(input)
			middle_data = np.frombuffer(input, dtype=np.int16)
			middle_data = np.asarray(middle_data)
			if middle_data.dtype.kind not in "iu":
			@@ -150,6 +153,31 @@
			array = np.frombuffer((middle_data.astype(dtype) - offset) / abs_max, dtype=np.float32)
			return array

			def validate_frame_rate(
			input,
			fs: int = 16000,
			):

			# 将文件读取为字节流
			byte_data = BytesIO(input)

			# 使用 pydub 加载音频
			audio = AudioSegment.from_file(byte_data)

			# 确保采样率为 16000 Hz
			if audio.frame_rate != fs:
			audio = audio.set_frame_rate(fs)

			# 将重新采样后的音频导出为字节流
			output = BytesIO()
			audio.export(output, format="wav")
			output.seek(0)

			# 获取重新采样后的字节流数据
			input = output.read()

			return input


			def extract_fbank(data, data_len=None, data_type: str = "sound", frontend=None, **kwargs):
			if isinstance(data, np.ndarray):