游雁
2024-02-20 2e8dc0933f31bf449ecc11ac1b4dc1833fdaad42
train finetune
5个文件已修改
20 ■■■■■ 已修改文件
examples/aishell/branchformer/run.sh 3 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
examples/aishell/e_branchformer/run.sh 3 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
examples/aishell/paraformer/run.sh 3 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
examples/aishell/transformer/run.sh 3 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/train_utils/trainer.py 8 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
examples/aishell/branchformer/run.sh
@@ -105,7 +105,8 @@
  echo "stage 4: ASR Training"
  mkdir -p ${exp_dir}/exp/${model_dir}
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt"
  current_time=$(date "+%Y-%m-%d_%H-%M")
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"
  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
examples/aishell/e_branchformer/run.sh
@@ -105,7 +105,8 @@
  echo "stage 4: ASR Training"
  mkdir -p ${exp_dir}/exp/${model_dir}
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt"
  current_time=$(date "+%Y-%m-%d_%H-%M")
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"
  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
examples/aishell/paraformer/run.sh
@@ -105,7 +105,8 @@
  echo "stage 4: ASR Training"
  mkdir -p ${exp_dir}/exp/${model_dir}
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt"
  current_time=$(date "+%Y-%m-%d_%H-%M")
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"
  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
examples/aishell/transformer/run.sh
@@ -105,7 +105,8 @@
  echo "stage 4: ASR Training"
  mkdir -p ${exp_dir}/exp/${model_dir}
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt"
  current_time=$(date "+%Y-%m-%d_%H-%M")
  log_file="${exp_dir}/exp/${model_dir}/train.log.txt.${current_time}"
  echo "log_file: ${log_file}"
  gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
funasr/train_utils/trainer.py
@@ -188,7 +188,7 @@
            epoch (int): The current epoch number.
        """
        self.model.train()
        pbar = tqdm(colour="blue", desc=f"Training Epoch: {epoch + 1}", total=len(self.dataloader_train),
        pbar = tqdm(colour="blue", desc=f"rank: {self.local_rank}, Training Epoch: {epoch + 1}", total=len(self.dataloader_train),
                    dynamic_ncols=True)
        
        # Set the number of steps for gradient accumulation
@@ -278,7 +278,7 @@
                    f"epoch: {epoch}/{self.max_epoch}, "
                    f"step: {batch_idx}/{len(self.dataloader_train)}, total: {self.batch_total}, "
                    f"(loss: {loss.detach().cpu().item():.3f}), "
                    f"{[(k, round(v.cpu().item(), 3)) for k, v in stats.items()]}"
                    f"{[(k, round(v.cpu().item(), 3)) for k, v in stats.items()]}, "
                    f"{speed_stats}, "
                    f"{gpu_info}"
                )
@@ -307,7 +307,7 @@
        """
        self.model.eval()
        with torch.no_grad():
            pbar = tqdm(colour="red", desc=f"Training Epoch: {epoch + 1}", total=len(self.dataloader_val),
            pbar = tqdm(colour="red", desc=f"rank: {self.local_rank}, Validation Epoch: {epoch + 1}", total=len(self.dataloader_val),
                        dynamic_ncols=True)
            speed_stats = {}
            time5 = time.perf_counter()
@@ -343,7 +343,7 @@
                        f"validation epoch: {epoch}/{self.max_epoch}, "
                        f"step: {batch_idx}/{len(self.dataloader_val)}, "
                        f"(loss: {loss.detach().cpu().item():.3f}), "
                        f"{[(k, round(v.cpu().item(), 3)) for k, v in stats.items()]}"
                        f"{[(k, round(v.cpu().item(), 3)) for k, v in stats.items()]}, "
                        f"{speed_stats}, "
                    )
                    pbar.set_description(description)