python/FunASR-XL.git - Gitblit

python / FunASR-XL

FUNASR训练

blame | 历史 | 补丁 | 提交 | 提交对比 | ignore whitespace

Merge pull request #367 from alibaba-damo-academy/dev_lhn2

hnluo

2023-04-17 24f73665e2d8ea8e4de2fe4f900bc539d7f7b989

 funasr/datasets/iterable_dataset.py

@@ -8,6 +8,7 @@
from typing import Iterator
from typing import Tuple
from typing import Union
from typing import List

import kaldiio
import numpy as np
@@ -129,7 +130,7 @@
        non_iterable_list = []
        self.path_name_type_list = []

        if not isinstance(path_name_type_list[0], Tuple):
        if not isinstance(path_name_type_list[0], (Tuple, List)):
            path = path_name_type_list[0]
            name = path_name_type_list[1]
            _type = path_name_type_list[2]
@@ -227,13 +228,9 @@
                name = self.path_name_type_list[i][1]
                _type = self.path_name_type_list[i][2]
                if _type == "sound":
                    audio_type = os.path.basename(value).split(".")[-1].lower()
                    if audio_type not in SUPPORT_AUDIO_TYPE_SETS:
                        raise NotImplementedError(
                            f'Not supported audio type: {audio_type}')
                    if audio_type == "pcm":
                        _type = "pcm"

                   audio_type = os.path.basename(value).lower()
                   if audio_type.rfind(".pcm") >= 0:
                       _type = "pcm"
                func = DATA_TYPES[_type]
                array = func(value)
                if self.fs is not None and (name == "speech" or name == "ref_speech"):
@@ -243,10 +240,15 @@
                        array = torch.from_numpy(array)
                        array = torchaudio.transforms.Resample(orig_freq=audio_fs,
                                                               new_freq=model_fs)(array)
                if self.mc:
                    data[name] = array.transpose(0, 1).numpy()
                        array = array.numpy()
                        
                if _type == "sound":
                    if self.mc:
                        data[name] = array.transpose((1, 0))
                    else:
                        data[name] = array[0]
                else:
                    data[name] = array[0].numpy()
                    data[name] = array

                if self.preprocess is not None:
                    data = self.preprocess(uid, data)
@@ -330,11 +332,8 @@
                # 2.a. Load data streamingly
                for value, (path, name, _type) in zip(values, self.path_name_type_list):
                    if _type == "sound":
                        audio_type = os.path.basename(value).split(".")[-1].lower()
                        if audio_type not in SUPPORT_AUDIO_TYPE_SETS:
                            raise NotImplementedError(
                                f'Not supported audio type: {audio_type}')
                        if audio_type == "pcm":
                        audio_type = os.path.basename(value).lower()
                        if audio_type.rfind(".pcm") >= 0:
                            _type = "pcm"
                    func = DATA_TYPES[_type]
                    # Load entry
@@ -346,11 +345,12 @@
                            array = torch.from_numpy(array)
                            array = torchaudio.transforms.Resample(orig_freq=audio_fs,
                                                                   new_freq=model_fs)(array)
                            array = array.numpy()
                    if _type == "sound":
                        if self.mc:
                            data[name] = array.transpose(0, 1).numpy()
                            data[name] = array.transpose((1, 0))
                        else:
                            data[name] = array[0].numpy()
                            data[name] = array[0]
                    else:
                        data[name] = array
                if self.non_iterable_dataset is not None:
@@ -385,3 +385,4 @@

        if count == 0:
            raise RuntimeError("No iteration")