querryton
2024-04-20 01df8f330ccc754223d5e2d688dc0a55d27f2dcc
funasr/train_utils/average_nbest_models.py
@@ -23,13 +23,14 @@
    in the output directory.
    """
    try:
        checkpoint = torch.load(os.path.exists(os.path.join(output_dir, "model.pt")), map_location="cpu")
        checkpoint = torch.load(os.path.join(output_dir, "model.pt"), map_location="cpu")
        avg_keep_nbest_models_type = checkpoint["avg_keep_nbest_models_type"]
        val_step_or_eoch = checkpoint[f"val_{avg_keep_nbest_models_type}_step_or_eoch"]
        sorted_items = sorted(saved_ckpts.items(), key=lambda x: x[1], reverse=True)
        sorted_items = sorted(val_step_or_eoch.items(), key=lambda x: x[1], reverse=True)
        sorted_items = sorted_items[:last_n] if avg_keep_nbest_models_type == "acc" else sorted_items[-last_n:]
        checkpoint_paths = [os.path.join(output_dir, key) for key, value in sorted_items[:last_n]]
    except:
        print(f"{checkpoint} does not exist, avg the lastet checkpoint.")
        # List all files in the output directory
        files = os.listdir(output_dir)
        # Filter out checkpoint files and extract epoch numbers
@@ -56,10 +57,9 @@
            state_dicts.append(torch.load(path, map_location='cpu')['state_dict'])
        else:
            print(f"Checkpoint file {path} not found.")
            continue
    # Check if we have any state_dicts to average
    if not state_dicts:
    if len(state_dicts) < 1:
        raise RuntimeError("No checkpoints found for averaging.")
    # Average or sum weights
@@ -75,6 +75,6 @@
            # Perform average for other types of tensors
            stacked_tensors = torch.stack(tensors)
            avg_state_dict[key] = torch.mean(stacked_tensors, dim=0)
    torch.save({'state_dict': avg_state_dict}, os.path.join(output_dir, f"model.pt.avg{last_n}"))
    return avg_state_dict
    checkpoint_outpath = os.path.join(output_dir, f"model.pt.avg{last_n}")
    torch.save({'state_dict': avg_state_dict}, checkpoint_outpath)
    return checkpoint_outpath