funasr/datasets/large_datasets/utils/tokenize.py
@@ -58,6 +58,7 @@ if 'hw_tag' in data: hotword_indxs = sample_hotword(length, **hw_config) data[hotword_indxs] = hotword_indxs del data['hw_tag'] for i in range(length): x = text[i] if i == length-1 and "punc" in data and x.startswith("vad:"):