From 98376eaab4f9d15fd7dbe862e870fcac21749668 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 30 四月 2024 11:13:51 +0800
Subject: [PATCH] batch
---
examples/aishell/conformer/run.sh | 3 +++
examples/README.md | 8 ++++----
examples/aishell/paraformer/run.sh | 3 +++
examples/aishell/branchformer/run.sh | 3 +++
examples/aishell/e_branchformer/run.sh | 3 +++
examples/aishell/transformer/run.sh | 3 +++
6 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/examples/README.md b/examples/README.md
index f87d5fa..0191a2d 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -248,10 +248,10 @@
export CUDA_VISIBLE_DEVICES="0,1"
gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-torchrun --nnodes 1 --nproc_per_node ${gpu_num} \
+torchrun --nnodes 1 --nproc_per_node ${gpu_num} --master_port 12345 \
../../../funasr/bin/train.py ${train_args}
```
---nnodes represents the total number of participating nodes, while --nproc_per_node indicates the number of processes running on each node.
+--nnodes represents the total number of participating nodes, while --nproc_per_node indicates the number of processes running on each node. --master_port indicates the port is 12345
##### Multi-Machine Multi-GPU Training
@@ -260,7 +260,7 @@
export CUDA_VISIBLE_DEVICES="0,1"
gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-torchrun --nnodes 2 --node_rank 0 --nproc_per_node ${gpu_num} --master_addr=192.168.1.1 --master_port=12345 \
+torchrun --nnodes 2 --node_rank 0 --nproc_per_node ${gpu_num} --master_addr 192.168.1.1 --master_port 12345 \
../../../funasr/bin/train.py ${train_args}
```
On the worker node (assuming the IP is 192.168.1.2), you need to ensure that the MASTER_ADDR and MASTER_PORT environment variables are set to match those of the master node, and then run the same command:
@@ -269,7 +269,7 @@
export CUDA_VISIBLE_DEVICES="0,1"
gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-torchrun --nnodes 2 --node_rank 1 --nproc_per_node ${gpu_num} --master_addr=192.168.1.1 --master_port=12345 \
+torchrun --nnodes 2 --node_rank 1 --nproc_per_node ${gpu_num} --master_addr 192.168.1.1 --master_port 12345 \
../../../funasr/bin/train.py ${train_args}
```
diff --git a/examples/aishell/branchformer/run.sh b/examples/aishell/branchformer/run.sh
index 918aa9b..5b64954 100755
--- a/examples/aishell/branchformer/run.sh
+++ b/examples/aishell/branchformer/run.sh
@@ -27,6 +27,8 @@
tag="exp1"
workspace=`pwd`
+master_port=12345
+
. utils/parse_options.sh || exit 1;
# Set bash to 'debug' mode, it will exit on :
@@ -115,6 +117,7 @@
torchrun \
--nnodes 1 \
--nproc_per_node ${gpu_num} \
+ --master_port ${master_port} \
../../../funasr/bin/train.py \
--config-path "${workspace}/conf" \
--config-name "${config}" \
diff --git a/examples/aishell/conformer/run.sh b/examples/aishell/conformer/run.sh
index ba8b43c..0c8ab50 100755
--- a/examples/aishell/conformer/run.sh
+++ b/examples/aishell/conformer/run.sh
@@ -27,6 +27,8 @@
tag="exp1"
workspace=`pwd`
+master_port=12345
+
. utils/parse_options.sh || exit 1;
# Set bash to 'debug' mode, it will exit on :
@@ -114,6 +116,7 @@
torchrun \
--nnodes 1 \
--nproc_per_node ${gpu_num} \
+ --master_port ${master_port} \
../../../funasr/bin/train.py \
--config-path "${workspace}/conf" \
--config-name "${config}" \
diff --git a/examples/aishell/e_branchformer/run.sh b/examples/aishell/e_branchformer/run.sh
index be18599..452ec80 100755
--- a/examples/aishell/e_branchformer/run.sh
+++ b/examples/aishell/e_branchformer/run.sh
@@ -27,6 +27,8 @@
tag="exp1"
workspace=`pwd`
+master_port=12345
+
. utils/parse_options.sh || exit 1;
# Set bash to 'debug' mode, it will exit on :
@@ -115,6 +117,7 @@
torchrun \
--nnodes 1 \
--nproc_per_node ${gpu_num} \
+ --master_port ${master_port} \
../../../funasr/bin/train.py \
--config-path "${workspace}/conf" \
--config-name "${config}" \
diff --git a/examples/aishell/paraformer/run.sh b/examples/aishell/paraformer/run.sh
index a957b93..ffef61e 100755
--- a/examples/aishell/paraformer/run.sh
+++ b/examples/aishell/paraformer/run.sh
@@ -27,6 +27,8 @@
tag="exp1"
workspace=`pwd`
+master_port=12345
+
. utils/parse_options.sh || exit 1;
# Set bash to 'debug' mode, it will exit on :
@@ -113,6 +115,7 @@
torchrun \
--nnodes 1 \
--nproc_per_node ${gpu_num} \
+ --master_port ${master_port} \
../../../funasr/bin/train.py \
--config-path "${workspace}/conf" \
--config-name "${config}" \
diff --git a/examples/aishell/transformer/run.sh b/examples/aishell/transformer/run.sh
index 98c2829..3fb8465 100755
--- a/examples/aishell/transformer/run.sh
+++ b/examples/aishell/transformer/run.sh
@@ -27,6 +27,8 @@
tag="exp1"
workspace=`pwd`
+master_port=12345
+
. utils/parse_options.sh || exit 1;
# Set bash to 'debug' mode, it will exit on :
@@ -115,6 +117,7 @@
torchrun \
--nnodes 1 \
--nproc_per_node ${gpu_num} \
+ --master_port ${master_port} \
../../../funasr/bin/train.py \
--config-path "${workspace}/conf" \
--config-name "${config}" \
--
Gitblit v1.9.1