From adcee8828ef5d78b575043954deb662a35e318f7 Mon Sep 17 00:00:00 2001
From: huangmingming <huangmingming@deepscience.cn>
Date: 星期一, 30 一月 2023 16:02:54 +0800
Subject: [PATCH] update the minimum size of audio

---
 funasr/layers/stft.py |   15 ++++++++++-----
 1 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/funasr/layers/stft.py b/funasr/layers/stft.py
index 21beaae..376b5a3 100644
--- a/funasr/layers/stft.py
+++ b/funasr/layers/stft.py
@@ -42,7 +42,8 @@
         self.normalized = normalized
         self.onesided = onesided
         if window is not None and not hasattr(torch, f"{window}_window"):
-            raise ValueError(f"{window} window is not implemented")
+            if window.lower() != "povey":
+                raise ValueError(f"{window} window is not implemented")
         self.window = window
 
     def extra_repr(self):
@@ -83,10 +84,14 @@
         # output: (Batch, Freq, Frames, 2=real_imag)
         # or (Batch, Channel, Freq, Frames, 2=real_imag)
         if self.window is not None:
-            window_func = getattr(torch, f"{self.window}_window")
-            window = window_func(
-                self.win_length, dtype=input.dtype, device=input.device
-            )
+            if self.window.lower() == "povey":
+                window = torch.hann_window(self.win_length, periodic=False,
+                                           device=input.device, dtype=input.dtype).pow(0.85)
+            else:
+                window_func = getattr(torch, f"{self.window}_window")
+                window = window_func(
+                    self.win_length, dtype=input.dtype, device=input.device
+                )
         else:
             window = None
 

--
Gitblit v1.9.1