shixian.shi
2023-05-04 4bbc661aa58798dbb9df7b7b548704bc5c619590
funasr/datasets/large_datasets/utils/tokenize.py
@@ -58,6 +58,7 @@
    if 'hw_tag' in data:
        hotword_indxs = sample_hotword(length, **hw_config)
        data[hotword_indxs] = hotword_indxs
        del data['hw_tag']
    for i in range(length):
        x = text[i]
        if i == length-1 and "punc" in data and x.startswith("vad:"):