shixian.shi
2023-06-27 1d1a33d5be06a9621ef835c430386dcd9832210d
funasr/datasets/large_datasets/utils/tokenize.py
@@ -54,6 +54,9 @@
    length = len(text)
    if 'hw_tag' in data:
        if hw_config['pre_hwlist'] is not None and hw_config['pre_prob'] > 0:
            # enable preset hotword detect in sampling
            import pdb; pdb.set_trace()
        hotword_indxs = sample_hotword(length, **hw_config)
        data['hotword_indxs'] = hotword_indxs
        del data['hw_tag']