From 10e37a721fdd2ecfd8e17f7213688927c29343a1 Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期四, 27 四月 2023 17:24:47 +0800
Subject: [PATCH] update
---
funasr/models/frontend/windowing.py | 10 +++-------
1 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/funasr/models/frontend/windowing.py b/funasr/models/frontend/windowing.py
index f7f1dc1..a526758 100644
--- a/funasr/models/frontend/windowing.py
+++ b/funasr/models/frontend/windowing.py
@@ -4,19 +4,18 @@
"""Sliding Window for raw audio input data."""
+from funasr.models.frontend.abs_frontend import AbsFrontend
import torch
from typeguard import check_argument_types
from typing import Tuple
-class SlidingWindow(torch.nn.Module):
+class SlidingWindow(AbsFrontend):
"""Sliding Window.
-
Provides a sliding window over a batched continuous raw audio tensor.
Optionally, provides padding (Currently not implemented).
Combine this module with a pre-encoder compatible with raw audio data,
for example Sinc convolutions.
-
Known issues:
Output length is calculated incorrectly if audio shorter than win_length.
WARNING: trailing values are discarded - padding not implemented yet.
@@ -32,7 +31,6 @@
fs=None,
):
"""Initialize.
-
Args:
win_length: Length of frame.
hop_length: Relative starting point of next frame.
@@ -52,11 +50,9 @@
self, input: torch.Tensor, input_lengths: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
"""Apply a sliding window on the input.
-
Args:
input: Input (B, T, C*D) or (B, T*C*D), with D=C=1.
input_lengths: Input lengths within batch.
-
Returns:
Tensor: Output with dimensions (B, T, C, D), with D=win_length.
Tensor: Output lengths within batch.
@@ -77,4 +73,4 @@
def output_size(self) -> int:
"""Return output length of feature dimension D, i.e. the window length."""
- return self.win_length
+ return self.win_length
\ No newline at end of file
--
Gitblit v1.9.1