shixian.shi
2023-06-27 187a302be238d1e6a757871f77e42117ff221c64
funasr/datasets/large_datasets/dataset.py
@@ -202,14 +202,7 @@
    data_types = conf.get("data_types", "kaldi_ark,text")
    pre_hwfile = conf.get("pre_hwlist", None)
    pre_prob = conf.get("pre_prob", 0)  # unused yet
    hw_config = {"sample_rate": conf.get("sample_rate", 0.6),
                 "double_rate": conf.get("double_rate", 0.1),
                 "hotword_min_length": conf.get("hotword_min_length", 2),
                 "hotword_max_length": conf.get("hotword_max_length", 8),
                 "pre_prob": conf.get("pre_prob", 0.0)}
    # pre_prob = conf.get("pre_prob", 0)  # unused yet
    if pre_hwfile is not None:
        pre_hwlist = []
        with open(pre_hwfile, 'r') as fin:
@@ -218,6 +211,15 @@
    else:
        pre_hwlist = None
    hw_config = {"sample_rate": conf.get("sample_rate", 0.6),
                 "double_rate": conf.get("double_rate", 0.1),
                 "hotword_min_length": conf.get("hotword_min_length", 2),
                 "hotword_max_length": conf.get("hotword_max_length", 8),
                 "pre_prob": conf.get("pre_prob", 0.0),
                 "pre_hwlist": pre_hwlist}
    dataset = AudioDataset(scp_lists, 
                           data_names, 
                           data_types,