From adcee8828ef5d78b575043954deb662a35e318f7 Mon Sep 17 00:00:00 2001
From: huangmingming <huangmingming@deepscience.cn>
Date: 星期一, 30 一月 2023 16:02:54 +0800
Subject: [PATCH] update the minimum size of audio
---
funasr/layers/stft.py | 15 ++++++++++-----
1 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/funasr/layers/stft.py b/funasr/layers/stft.py
index 21beaae..376b5a3 100644
--- a/funasr/layers/stft.py
+++ b/funasr/layers/stft.py
@@ -42,7 +42,8 @@
self.normalized = normalized
self.onesided = onesided
if window is not None and not hasattr(torch, f"{window}_window"):
- raise ValueError(f"{window} window is not implemented")
+ if window.lower() != "povey":
+ raise ValueError(f"{window} window is not implemented")
self.window = window
def extra_repr(self):
@@ -83,10 +84,14 @@
# output: (Batch, Freq, Frames, 2=real_imag)
# or (Batch, Channel, Freq, Frames, 2=real_imag)
if self.window is not None:
- window_func = getattr(torch, f"{self.window}_window")
- window = window_func(
- self.win_length, dtype=input.dtype, device=input.device
- )
+ if self.window.lower() == "povey":
+ window = torch.hann_window(self.win_length, periodic=False,
+ device=input.device, dtype=input.dtype).pow(0.85)
+ else:
+ window_func = getattr(torch, f"{self.window}_window")
+ window = window_func(
+ self.win_length, dtype=input.dtype, device=input.device
+ )
else:
window = None
--
Gitblit v1.9.1