From 4e7b67d756f1ab50b6bbe2d5af3af146aaf3c51c Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期二, 05 十二月 2023 14:43:36 +0800
Subject: [PATCH] update docs
---
funasr/bin/asr_infer.py | 40 ++++++++++++++++++++--------------------
1 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/funasr/bin/asr_infer.py b/funasr/bin/asr_infer.py
index ef3ed6e..a1cede1 100644
--- a/funasr/bin/asr_infer.py
+++ b/funasr/bin/asr_infer.py
@@ -34,8 +34,8 @@
from funasr.modules.scorers.ctc import CTCPrefixScorer
from funasr.modules.scorers.length_bonus import LengthBonus
from funasr.build_utils.build_asr_model import frontend_choices
-from funasr.text.build_tokenizer import build_tokenizer
-from funasr.text.token_id_converter import TokenIDConverter
+from funasr.tokenizer.build_tokenizer import build_tokenizer
+from funasr.tokenizer.token_id_converter import TokenIDConverter
from funasr.torch_utils.device_funcs import to_device
from funasr.utils.timestamp_tools import ts_prediction_lfr6_standard
@@ -44,9 +44,9 @@
"""Speech2Text class
Examples:
- >>> import soundfile
+ >>> import librosa
>>> speech2text = Speech2Text("asr_config.yml", "asr.pb")
- >>> audio, rate = soundfile.read("speech.wav")
+ >>> audio, rate = librosa.load("speech.wav")
>>> speech2text(audio)
[(text, token, token_int, hypothesis object), ...]
@@ -251,9 +251,9 @@
"""Speech2Text class
Examples:
- >>> import soundfile
+ >>> import librosa
>>> speech2text = Speech2TextParaformer("asr_config.yml", "asr.pb")
- >>> audio, rate = soundfile.read("speech.wav")
+ >>> audio, rate = librosa.load("speech.wav")
>>> speech2text(audio)
[(text, token, token_int, hypothesis object), ...]
@@ -625,9 +625,9 @@
"""Speech2Text class
Examples:
- >>> import soundfile
+ >>> import librosa
>>> speech2text = Speech2TextParaformerOnline("asr_config.yml", "asr.pth")
- >>> audio, rate = soundfile.read("speech.wav")
+ >>> audio, rate = librosa.load("speech.wav")
>>> speech2text(audio)
[(text, token, token_int, hypothesis object), ...]
@@ -876,9 +876,9 @@
"""Speech2Text class
Examples:
- >>> import soundfile
+ >>> import librosa
>>> speech2text = Speech2TextUniASR("asr_config.yml", "asr.pb")
- >>> audio, rate = soundfile.read("speech.wav")
+ >>> audio, rate = librosa.load("speech.wav")
>>> speech2text(audio)
[(text, token, token_int, hypothesis object), ...]
@@ -1106,9 +1106,9 @@
"""Speech2Text class
Examples:
- >>> import soundfile
+ >>> import librosa
>>> speech2text = Speech2TextMFCCA("asr_config.yml", "asr.pb")
- >>> audio, rate = soundfile.read("speech.wav")
+ >>> audio, rate = librosa.load("speech.wav")
>>> speech2text(audio)
[(text, token, token_int, hypothesis object), ...]
@@ -1605,7 +1605,6 @@
feats_lengths = to_device(feats_lengths, device=self.device)
enc_out, _, _ = self.asr_model.encoder(feats, feats_lengths)
-
nbest_hyps = self.beam_search(enc_out[0])
return nbest_hyps
@@ -1638,9 +1637,9 @@
"""Speech2Text class
Examples:
- >>> import soundfile
+ >>> import librosa
>>> speech2text = Speech2TextSAASR("asr_config.yml", "asr.pb")
- >>> audio, rate = soundfile.read("speech.wav")
+ >>> audio, rate = librosa.load("speech.wav")
>>> speech2text(audio)
[(text, token, token_int, hypothesis object), ...]
@@ -1886,9 +1885,9 @@
"""Speech2Text class
Examples:
- >>> import soundfile
+ >>> import librosa
>>> speech2text = Speech2Text("asr_config.yml", "asr.pb")
- >>> audio, rate = soundfile.read("speech.wav")
+ >>> audio, rate = librosa.load("speech.wav")
>>> speech2text(audio)
[(text, token, token_int, hypothesis object), ...]
@@ -1922,9 +1921,6 @@
):
from funasr.tasks.whisper import ASRTask
- from funasr.utils.whisper_utils.transcribe import transcribe
- from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram
- from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode
# 1. Build ASR model
scorers = {}
@@ -1987,6 +1983,10 @@
"""
+ from funasr.utils.whisper_utils.transcribe import transcribe
+ from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram
+ from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode
+
speech = speech[0]
speech = pad_or_trim(speech)
mel = log_mel_spectrogram(speech).to(self.device)
--
Gitblit v1.9.1