hnluo
2023-03-17 b78de3a56da2380568f8fb106576fde71b2bd299
Merge pull request #256 from alibaba-damo-academy/dev_wjm

Dev wjm
3个文件已修改
14 ■■■■ 已修改文件
funasr/bin/asr_inference_mfcca.py 2 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/datasets/preprocessor.py 10 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
tests/test_asr_inference_pipeline.py 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/bin/asr_inference_mfcca.py
@@ -41,8 +41,6 @@
from funasr.utils import asr_utils, wav_utils, postprocess_utils
import pdb
header_colors = '\033[95m'
end_colors = '\033[0m'
global_asr_language: str = 'zh-cn'
global_sample_rate: Union[int, Dict[Any, int]] = {
funasr/datasets/preprocessor.py
@@ -47,15 +47,11 @@
def seg_tokenize(txt, seg_dict):
    out_txt = ""
    pattern = re.compile(r"([\u4E00-\u9FA5A-Za-z0-9])")
    for word in txt:
        if pattern.match(word):
            if word in seg_dict:
                out_txt += seg_dict[word] + " "
            else:
                out_txt += "<unk>" + " "
        if word in seg_dict:
            out_txt += seg_dict[word] + " "
        else:
            continue
            out_txt += "<unk>" + " "
    return out_txt.strip().split()
def seg_tokenize_wo_pattern(txt, seg_dict):
tests/test_asr_inference_pipeline.py
@@ -452,7 +452,7 @@
    def test_uniasr_2pass_zhcn_16k_common_vocab8358_offline(self):
        inference_pipeline = pipeline(
            task=Tasks.auto_speech_recognition,
            model='damo/speech_UniASauto_speech_recognitionR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline')
            model='damo/speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline')
        rec_result = inference_pipeline(
            audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav',
            param_dict={"decoding_model": "offline"})