zhifu gao
2024-03-28 08a318b72f1a0229a6de84950690b2aac69c9ec2
Dev gzf new (#1557)

* train

* train

* train

* train

* train

* train

* train

* train

* train

* train

* train

* train

* train

* train
2个文件已修改
8 ■■■■ 已修改文件
funasr/datasets/audio_datasets/espnet_samplers.py 4 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/datasets/audio_datasets/samplers.py 4 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/datasets/audio_datasets/espnet_samplers.py
@@ -56,8 +56,8 @@
        self.shuffle = shuffle and is_training
        self.drop_last = drop_last
        # self.total_size = len(self.dataset)
        # self.num_samples = int(math.ceil(self.total_size / self.num_replicas))
        self.total_size = len(self.dataset)
        self.num_samples = int(math.ceil(self.total_size / self.num_replicas))
        self.epoch = 0
        self.sort_size = sort_size * num_replicas
        self.max_token_length = kwargs.get("max_token_length", 2048)
funasr/datasets/audio_datasets/samplers.py
@@ -323,7 +323,7 @@
        self.shuffle = shuffle and is_training
        self.drop_last = drop_last
        
        # self.total_size = len(self.dataset)
        self.total_size = len(self.dataset)
        self.num_samples = int(math.ceil(self.total_size / self.num_replicas))
        self.epoch = 0
        self.sort_size = sort_size * num_replicas
@@ -350,7 +350,7 @@
            max_len_in_batch = 0
            for idx in buffer:
                original_sample_length = self.dataset.get_source_len(idx)
                if original_sample_length > self.max_sample_length:
                if original_sample_length > self.max_token_length:
                    continue
                sample_length = 1 if self.batch_type == "example" else original_sample_length
                potential_batch_length = max(max_len_in_batch, sample_length) * (len(batch) + 1)