| | |
| | | result[k] = restored_data[j][k] |
| | | else: |
| | | result[k] = torch.cat([result[k], restored_data[j][k]], dim=0) |
| | | elif k == 'text': |
| | | elif k == 'raw_text': |
| | | if k not in result: |
| | | result[k] = restored_data[j][k] |
| | | else: |
| | |
| | | if self.spk_model is not None: |
| | | all_segments = sorted(all_segments, key=lambda x: x[0]) |
| | | spk_embedding = result['spk_embedding'] |
| | | labels = self.cb_model(spk_embedding, oracle_num=self.preset_spk_num) |
| | | labels = self.cb_model(spk_embedding.cpu(), oracle_num=self.preset_spk_num) |
| | | del result['spk_embedding'] |
| | | sv_output = postprocess(all_segments, None, labels, spk_embedding.cpu()) |
| | | if self.spk_mode == 'vad_segment': |
| | |
| | | for res, vadsegment in zip(restored_data, vadsegments): |
| | | sentence_list.append({"start": vadsegment[0],\ |
| | | "end": vadsegment[1], |
| | | "sentence": res['text'], |
| | | "sentence": res['raw_text'], |
| | | "timestamp": res['timestamp']}) |
| | | else: # punc_segment |
| | | sentence_list = timestamp_sentence(punc_res[0]['punc_array'], \ |
| | | result['timestamp'], \ |
| | | result['text']) |
| | | result['raw_text']) |
| | | distribute_spk(sentence_list, sv_output) |
| | | result['sentence_info'] = sentence_list |
| | | |