From acb9a0fec8d8a4dabeedcbb8e08c26f66d7083f0 Mon Sep 17 00:00:00 2001
From: haoneng.lhn <haoneng.lhn@alibaba-inc.com>
Date: 星期五, 08 十二月 2023 16:19:00 +0800
Subject: [PATCH] fix loss normalization for ddp training
---
funasr/modules/rwkv_attention.py | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/funasr/modules/rwkv_attention.py b/funasr/modules/rwkv_attention.py
index f0c7da3..5384fb9 100644
--- a/funasr/modules/rwkv_attention.py
+++ b/funasr/modules/rwkv_attention.py
@@ -445,7 +445,7 @@
"""
num_state, den_state, max_state = state
-
+ time_decay = -torch.exp(time_decay)
max_for_output = torch.maximum(max_state, (time_first + key))
e1 = torch.exp(max_state - max_for_output)
@@ -495,7 +495,7 @@
dropout_rate,
num_blocks
)
- load_decoder_wkv_kernel(context_size)
+ # load_decoder_wkv_kernel(context_size)
def forward(
self,
@@ -577,7 +577,7 @@
dropout_rate,
num_blocks
)
- load_encoder_wkv_kernel(context_size)
+ # load_encoder_wkv_kernel(context_size)
def forward(
self,
--
Gitblit v1.9.1