| | |
| | | vad_model = kwargs.get("vad_model", None) |
| | | vad_kwargs = kwargs.get("vad_model_revision", None) |
| | | if vad_model is not None: |
| | | print("build vad model") |
| | | logging.info("Building VAD model.") |
| | | vad_kwargs = {"model": vad_model, "model_revision": vad_kwargs} |
| | | vad_model, vad_kwargs = self.build_model(**vad_kwargs) |
| | | |
| | |
| | | punc_model = kwargs.get("punc_model", None) |
| | | punc_kwargs = kwargs.get("punc_model_revision", None) |
| | | if punc_model is not None: |
| | | logging.info("Building punc model.") |
| | | punc_kwargs = {"model": punc_model, "model_revision": punc_kwargs} |
| | | punc_model, punc_kwargs = self.build_model(**punc_kwargs) |
| | | |
| | |
| | | spk_model = kwargs.get("spk_model", None) |
| | | spk_kwargs = kwargs.get("spk_model_revision", None) |
| | | if spk_model is not None: |
| | | logging.info("Building SPK model.") |
| | | spk_kwargs = {"model": spk_model, "model_revision": spk_kwargs} |
| | | spk_model, spk_kwargs = self.build_model(**spk_kwargs) |
| | | self.cb_model = ClusterBackend() |
| | |
| | | if spk_mode not in ["default", "vad_segment", "punc_segment"]: |
| | | logging.error("spk_mode should be one of default, vad_segment and punc_segment.") |
| | | self.spk_mode = spk_mode |
| | | self.preset_spk_num = kwargs.get("preset_spk_num", None) |
| | | if self.preset_spk_num: |
| | | logging.warning("Using preset speaker number: {}".format(self.preset_spk_num)) |
| | | logging.warning("Many to print when using speaker model...") |
| | | |
| | | self.kwargs = kwargs |
| | |
| | | self.punc_kwargs = punc_kwargs |
| | | self.spk_model = spk_model |
| | | self.spk_kwargs = spk_kwargs |
| | | self.model_path = kwargs["model_path"] |
| | | |
| | | |
| | | def build_model(self, **kwargs): |
| | |
| | | if self.spk_model is not None: |
| | | all_segments = sorted(all_segments, key=lambda x: x[0]) |
| | | spk_embedding = result['spk_embedding'] |
| | | labels = self.cb_model(spk_embedding) |
| | | labels = self.cb_model(spk_embedding, oracle_num=self.preset_spk_num) |
| | | del result['spk_embedding'] |
| | | sv_output = postprocess(all_segments, None, labels, spk_embedding) |
| | | sv_output = postprocess(all_segments, None, labels, spk_embedding.cpu()) |
| | | if self.spk_mode == 'vad_segment': |
| | | sentence_list = [] |
| | | for res, vadsegment in zip(restored_data, vadsegments): |