python/FunASR-XL.git

			@@ -19,7 +19,7 @@


			from funasr.models.paraformer.search import Hypothesis
			from funasr.models.sense_voice.utils.ctc_alignment import ctc_forced_align
			from .utils.ctc_alignment import ctc_forced_align


			class SinusoidalPositionEncoder(torch.nn.Module):
			@@ -916,6 +916,7 @@

			if output_timestamp:
			from itertools import groupby

			timestamp = []
			tokens = tokenizer.text2tokens(text)[4:]
			logits_speech = self.ctc.softmax(encoder_out)[i, 4:encoder_out_lens[i].item(), :]
			@@ -952,7 +953,7 @@
			timestamp_new = []
			for i, t in enumerate(timestamp):
			word, start, end = t
			if word == '▁':
			if word == "▁":
			continue
			if i == 0:
			# timestamp_new.append([word, start, end])
			@@ -965,6 +966,7 @@
			# timestamp_new[-1][0] += word
			timestamp_new[-1][1] = int(end*1000)
			return timestamp_new

			def export(self, **kwargs):
			from export_meta import export_rebuild_model

			@@ -974,4 +976,3 @@
			return models

			return results, meta_data

	funasr/models/sense_voice/model.py	7 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/sense_voice/utils/__init__.py	补丁 \| 查看 \| 原始文档 \| blame \| 历史