zhifu gao
2023-03-23 4e44c9f46e550eab4ec6b70c099dcdae44eb9d61
funasr/bin/vad_inference_online.py
@@ -1,5 +1,6 @@
import argparse
import logging
import os
import sys
import json
from pathlib import Path
@@ -29,14 +30,7 @@
from funasr.models.frontend.wav_frontend import WavFrontend
from funasr.bin.vad_inference import Speech2VadSegment
header_colors = '\033[95m'
end_colors = '\033[0m'
global_asr_language: str = 'zh-cn'
global_sample_rate: Union[int, Dict[Any, int]] = {
    'audio_fs': 16000,
    'model_fs': 16000
}
class Speech2VadSegmentOnline(Speech2VadSegment):
@@ -96,7 +90,7 @@
            }
            # a. To device
            batch = to_device(batch, device=self.device)
            segments, in_cache = self.vad_model(**batch)
            segments, in_cache = self.vad_model.forward_online(**batch)
            # in_cache.update(batch['in_cache'])
            # in_cache = {key: value for key, value in batch['in_cache'].items()}
        return fbanks, segments, in_cache
@@ -236,12 +230,14 @@
            # param_dict['in_cache'] = batch['in_cache']
            if results:
                for i, _ in enumerate(keys):
                    results[i] = json.dumps(results[i])
                    item = {'key': keys[i], 'value': results[i]}
                    vad_results.append(item)
                    if writer is not None:
                        results[i] = json.loads(results[i])
                        ibest_writer["text"][keys[i]] = "{}".format(results[i])
                    if results[i]:
                        if "MODELSCOPE_ENVIRONMENT" in os.environ and os.environ["MODELSCOPE_ENVIRONMENT"] == "eas":
                            results[i] = json.dumps(results[i])
                        item = {'key': keys[i], 'value': results[i]}
                        vad_results.append(item)
                        if writer is not None:
                            results[i] = json.loads(results[i])
                            ibest_writer["text"][keys[i]] = "{}".format(results[i])
        return vad_results