python/FunASR-XL.git

parent: 9a8086bd | 补丁 | 提交 | ignore whitespace

游雁

2024-04-30 98376eaab4f9d15fd7dbe862e870fcac21749668

batch

6个文件已修改

	examples/README.md	8 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	examples/aishell/branchformer/run.sh	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	examples/aishell/conformer/run.sh	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	examples/aishell/e_branchformer/run.sh	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	examples/aishell/paraformer/run.sh	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	examples/aishell/transformer/run.sh	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 examples/README.md

@@ -248,10 +248,10 @@
export CUDA_VISIBLE_DEVICES="0,1"
gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')

torchrun --nnodes 1 --nproc_per_node ${gpu_num} \
torchrun --nnodes 1 --nproc_per_node ${gpu_num} --master_port 12345 \
../../../funasr/bin/train.py ${train_args}
```
--nnodes represents the total number of participating nodes, while --nproc_per_node indicates the number of processes running on each node.
--nnodes represents the total number of participating nodes, while --nproc_per_node indicates the number of processes running on each node. --master_port indicates the port is 12345

##### Multi-Machine Multi-GPU Training

@@ -260,7 +260,7 @@
export CUDA_VISIBLE_DEVICES="0,1"
gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')

torchrun --nnodes 2 --node_rank 0 --nproc_per_node ${gpu_num} --master_addr=192.168.1.1 --master_port=12345 \
torchrun --nnodes 2 --node_rank 0 --nproc_per_node ${gpu_num} --master_addr 192.168.1.1 --master_port 12345 \
../../../funasr/bin/train.py ${train_args}
```
On the worker node (assuming the IP is 192.168.1.2), you need to ensure that the MASTER_ADDR and MASTER_PORT environment variables are set to match those of the master node, and then run the same command:
@@ -269,7 +269,7 @@
export CUDA_VISIBLE_DEVICES="0,1"
gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')

torchrun --nnodes 2 --node_rank 1 --nproc_per_node ${gpu_num} --master_addr=192.168.1.1 --master_port=12345 \
torchrun --nnodes 2 --node_rank 1 --nproc_per_node ${gpu_num} --master_addr 192.168.1.1 --master_port 12345 \
../../../funasr/bin/train.py ${train_args}
```


 examples/aishell/branchformer/run.sh

@@ -27,6 +27,8 @@
tag="exp1"
workspace=`pwd`

master_port=12345

. utils/parse_options.sh || exit 1;

# Set bash to 'debug' mode, it will exit on :
@@ -115,6 +117,7 @@
  torchrun \
  --nnodes 1 \
  --nproc_per_node ${gpu_num} \
  --master_port ${master_port} \
  ../../../funasr/bin/train.py \
  --config-path "${workspace}/conf" \
  --config-name "${config}" \

 examples/aishell/conformer/run.sh

@@ -27,6 +27,8 @@
tag="exp1"
workspace=`pwd`

master_port=12345

. utils/parse_options.sh || exit 1;

# Set bash to 'debug' mode, it will exit on :
@@ -114,6 +116,7 @@
  torchrun \
  --nnodes 1 \
  --nproc_per_node ${gpu_num} \
  --master_port ${master_port} \
  ../../../funasr/bin/train.py \
  --config-path "${workspace}/conf" \
  --config-name "${config}" \

 examples/aishell/e_branchformer/run.sh

@@ -27,6 +27,8 @@
tag="exp1"
workspace=`pwd`

master_port=12345

. utils/parse_options.sh || exit 1;

# Set bash to 'debug' mode, it will exit on :
@@ -115,6 +117,7 @@
  torchrun \
  --nnodes 1 \
  --nproc_per_node ${gpu_num} \
  --master_port ${master_port} \
  ../../../funasr/bin/train.py \
  --config-path "${workspace}/conf" \
  --config-name "${config}" \

 examples/aishell/paraformer/run.sh

@@ -27,6 +27,8 @@
tag="exp1"
workspace=`pwd`

master_port=12345

. utils/parse_options.sh || exit 1;

# Set bash to 'debug' mode, it will exit on :
@@ -113,6 +115,7 @@
  torchrun \
  --nnodes 1 \
  --nproc_per_node ${gpu_num} \
  --master_port ${master_port} \
  ../../../funasr/bin/train.py \
  --config-path "${workspace}/conf" \
  --config-name "${config}" \

 examples/aishell/transformer/run.sh

@@ -27,6 +27,8 @@
tag="exp1"
workspace=`pwd`

master_port=12345

. utils/parse_options.sh || exit 1;

# Set bash to 'debug' mode, it will exit on :
@@ -115,6 +117,7 @@
  torchrun \
  --nnodes 1 \
  --nproc_per_node ${gpu_num} \
  --master_port ${master_port} \
  ../../../funasr/bin/train.py \
  --config-path "${workspace}/conf" \
  --config-name "${config}" \

			@@ -248,10 +248,10 @@
			export CUDA_VISIBLE_DEVICES="0,1"
			gpu_num=$(echo $CUDA_VISIBLE_DEVICES \| awk -F "," '{print NF}')

			torchrun --nnodes 1 --nproc_per_node ${gpu_num} \
			torchrun --nnodes 1 --nproc_per_node ${gpu_num} --master_port 12345 \
			../../../funasr/bin/train.py ${train_args}
			```
			--nnodes represents the total number of participating nodes, while --nproc_per_node indicates the number of processes running on each node.
			--nnodes represents the total number of participating nodes, while --nproc_per_node indicates the number of processes running on each node. --master_port indicates the port is 12345

			##### Multi-Machine Multi-GPU Training

			@@ -260,7 +260,7 @@
			export CUDA_VISIBLE_DEVICES="0,1"
			gpu_num=$(echo $CUDA_VISIBLE_DEVICES \| awk -F "," '{print NF}')

			torchrun --nnodes 2 --node_rank 0 --nproc_per_node ${gpu_num} --master_addr=192.168.1.1 --master_port=12345 \
			torchrun --nnodes 2 --node_rank 0 --nproc_per_node ${gpu_num} --master_addr 192.168.1.1 --master_port 12345 \
			../../../funasr/bin/train.py ${train_args}
			```
			On the worker node (assuming the IP is 192.168.1.2), you need to ensure that the MASTER_ADDR and MASTER_PORT environment variables are set to match those of the master node, and then run the same command:
			@@ -269,7 +269,7 @@
			export CUDA_VISIBLE_DEVICES="0,1"
			gpu_num=$(echo $CUDA_VISIBLE_DEVICES \| awk -F "," '{print NF}')

			torchrun --nnodes 2 --node_rank 1 --nproc_per_node ${gpu_num} --master_addr=192.168.1.1 --master_port=12345 \
			torchrun --nnodes 2 --node_rank 1 --nproc_per_node ${gpu_num} --master_addr 192.168.1.1 --master_port 12345 \
			../../../funasr/bin/train.py ${train_args}
			```

			@@ -27,6 +27,8 @@
			tag="exp1"
			workspace=`pwd`

			master_port=12345

			. utils/parse_options.sh \|\| exit 1;

			# Set bash to 'debug' mode, it will exit on :
			@@ -115,6 +117,7 @@
			torchrun \
			--nnodes 1 \
			--nproc_per_node ${gpu_num} \
			--master_port ${master_port} \
			../../../funasr/bin/train.py \
			--config-path "${workspace}/conf" \
			--config-name "${config}" \