| | |
| | | parts = line.strip().split() |
| | | if len(parts) < 2: |
| | | continue |
| | | text_dict[parts[0]] = " ".join(parts[1:]).lower() |
| | | text_dict[parts[0]] = " ".join(parts[1:]) |
| | | filter_count = 0 |
| | | with open(wav_file, "w") as f_wav, open(text_file, "w") as f_text: |
| | | for sample_name, wav_path in wav_dict.items(): |
| | |
| | | ["{}/{}/wav.scp".format(args.data_dir, args.valid_set), data_names[0], data_types[0]], |
| | | ["{}/{}/text".format(args.data_dir, args.valid_set), data_names[1], data_types[1]] |
| | | ] |
| | | if args.embed_path is not None: |
| | | args.train_data_path_and_name_and_type[0].append( |
| | | "{}/embed/kaldi_ark".format(os.path.join(args.embed_path, "embeds", args.train_set, "embeds.scp"))) |
| | | args.valid_data_path_and_name_and_type[0].append( |
| | | "{}/embed/kaldi_ark".format(os.path.join(args.embed_path, "embeds", args.dev_set, "embeds.scp"))) |
| | | else: |
| | | args.train_data_file = os.path.join(args.data_dir, args.train_set, "data.list") |
| | | args.valid_data_file = os.path.join(args.data_dir, args.valid_set, "data.list") |