| | |
| | | |
| | | for line in lines: |
| | | |
| | | jsonl_file_out_f.write(line) |
| | | jsonl_file_out_f.write(line + "\n") |
| | | jsonl_file_out_f.flush() |
| | | |
| | | jsonl_file_out_f.close() |
| | |
| | | sample_num = len(waveform) |
| | | source_len = int(sample_num / 16000 * 1000 / 10) |
| | | source_len_old = data["source_len"] |
| | | if source_len_old != source_len: |
| | | print(f"wav: {wav_path}, old: {source_len_old}, new: {source_len}") |
| | | if (source_len_old - source_len) > 100 or (source_len - source_len_old) > 100: |
| | | print(f"old: {source_len_old}, new: {source_len}, wav: {wav_path}") |
| | | data["source_len"] = source_len |
| | | data["source"] = wav_path |
| | | jsonl_line = json.dumps(data, ensure_ascii=False) |
| | | lines[i] = jsonl_line |
| | | |