| | |
| | | import argparse |
| | | import logging |
| | | import os |
| | | import sys |
| | | import json |
| | | from pathlib import Path |
| | |
| | | from funasr.models.frontend.wav_frontend import WavFrontend |
| | | from funasr.bin.vad_inference import Speech2VadSegment |
| | | |
| | | header_colors = '\033[95m' |
| | | end_colors = '\033[0m' |
| | | |
| | | global_asr_language: str = 'zh-cn' |
| | | global_sample_rate: Union[int, Dict[Any, int]] = { |
| | | 'audio_fs': 16000, |
| | | 'model_fs': 16000 |
| | | } |
| | | |
| | | |
| | | class Speech2VadSegmentOnline(Speech2VadSegment): |
| | |
| | | } |
| | | # a. To device |
| | | batch = to_device(batch, device=self.device) |
| | | segments, in_cache = self.vad_model(**batch) |
| | | segments, in_cache = self.vad_model.forward_online(**batch) |
| | | # in_cache.update(batch['in_cache']) |
| | | # in_cache = {key: value for key, value in batch['in_cache'].items()} |
| | | return fbanks, segments, in_cache |
| | |
| | | # param_dict['in_cache'] = batch['in_cache'] |
| | | if results: |
| | | for i, _ in enumerate(keys): |
| | | results[i] = json.dumps(results[i]) |
| | | item = {'key': keys[i], 'value': results[i]} |
| | | vad_results.append(item) |
| | | if writer is not None: |
| | | results[i] = json.loads(results[i]) |
| | | ibest_writer["text"][keys[i]] = "{}".format(results[i]) |
| | | if results[i]: |
| | | if "MODELSCOPE_ENVIRONMENT" in os.environ and os.environ["MODELSCOPE_ENVIRONMENT"] == "eas": |
| | | results[i] = json.dumps(results[i]) |
| | | item = {'key': keys[i], 'value': results[i]} |
| | | vad_results.append(item) |
| | | if writer is not None: |
| | | results[i] = json.loads(results[i]) |
| | | ibest_writer["text"][keys[i]] = "{}".format(results[i]) |
| | | |
| | | return vad_results |
| | | |