python/FunASR-XL.git

FUNASR训练

parent: 7ee71675 | 补丁 | 提交 | show whitespace

speech_asr

2023-03-16 c3bce4c288f73a3bbf5559b019d4480f95acffaa

update

3个文件已修改

	funasr/bin/eend_ola_inference.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/modules/eend_ola/encoder.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	setup.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 funasr/bin/eend_ola_inference.py

@@ -209,7 +209,7 @@
        if data_path_and_name_and_type is None and raw_inputs is not None:
            if isinstance(raw_inputs, torch.Tensor):
                raw_inputs = raw_inputs.numpy()
            data_path_and_name_and_type = [raw_inputs[0], "speech", "bytes"]
            data_path_and_name_and_type = [raw_inputs[0], "speech", "sound"]
        loader = EENDOLADiarTask.build_streaming_iterator(
            data_path_and_name_and_type,
            dtype=dtype,

 funasr/modules/eend_ola/encoder.py

@@ -87,7 +87,7 @@
                 n_layers: int,
                 n_units: int,
                 e_units: int = 2048,
                 h: int = 8,
                 h: int = 4,
                 dropout_rate: float = 0.1,
                 use_pos_emb: bool = False):
        super(EENDOLATransformerEncoder, self).__init__()

 setup.py

@@ -17,7 +17,7 @@
        "humanfriendly",
        "scipy>=1.4.1",
        # "filelock",
        "librosa>=0.8.0",
        "librosa==0.8.1",
        "jamo==0.4.1",  # For kss
        "PyYAML>=5.1.2",
        "soundfile>=0.10.2",

			@@ -209,7 +209,7 @@
			if data_path_and_name_and_type is None and raw_inputs is not None:
			if isinstance(raw_inputs, torch.Tensor):
			raw_inputs = raw_inputs.numpy()
			data_path_and_name_and_type = [raw_inputs[0], "speech", "bytes"]
			data_path_and_name_and_type = [raw_inputs[0], "speech", "sound"]
			loader = EENDOLADiarTask.build_streaming_iterator(
			data_path_and_name_and_type,
			dtype=dtype,

			@@ -87,7 +87,7 @@
			n_layers: int,
			n_units: int,
			e_units: int = 2048,
			h: int = 8,
			h: int = 4,
			dropout_rate: float = 0.1,
			use_pos_emb: bool = False):
			super(EENDOLATransformerEncoder, self).__init__()

			@@ -17,7 +17,7 @@
			"humanfriendly",
			"scipy>=1.4.1",
			# "filelock",
			"librosa>=0.8.0",
			"librosa==0.8.1",
			"jamo==0.4.1", # For kss
			"PyYAML>=5.1.2",
			"soundfile>=0.10.2",