| | |
| | | read from SI1279.PHN file -> "sil b r ih sil k s aa r er n aa l |
| | | sil t er n ih sil t ih v sil" """, |
| | | ) |
| | | parser.add_argument( |
| | | "--text_format", |
| | | default="text", |
| | | type=str, |
| | | help="text, jsonl", |
| | | ) |
| | | return parser |
| | | |
| | | |
| | |
| | | line = f.readline() |
| | | n = args.nchar |
| | | while line: |
| | | if args.text_format == "jsonl": |
| | | data = json.loads(line.strip()) |
| | | line = data["target"] |
| | | x = line.split() |
| | | print(" ".join(x[: args.skip_ncols]), end=" ") |
| | | a = " ".join(x[args.skip_ncols :]) |
| | |
| | | frontend=frontend, |
| | | tokenizer=None, |
| | | is_training=False, |
| | | **kwargs.get("dataset_conf") |
| | | **kwargs.get("dataset_conf"), |
| | | ) |
| | | |
| | | # dataloader |
| | |
| | | dataset_train, collate_fn=dataset_train.collator, **batch_sampler_train |
| | | ) |
| | | |
| | | iter_stop = int(kwargs.get("scale", 1.0) * len(dataloader_train)) |
| | | |
| | | total_frames = 0 |
| | | for batch_idx, batch in enumerate(dataloader_train): |
| | | if batch_idx >= iter_stop: |
| | | iter_stop = int(kwargs.get("scale", -1.0) * len(dataloader_train)) |
| | | log_step = iter_stop // 100 |
| | | if batch_idx % log_step == 0: |
| | | print(f"prcessed: {batch_idx}/{iter_stop}") |
| | | if batch_idx >= iter_stop and iter_stop > 0.0: |
| | | break |
| | | |
| | | fbank = batch["speech"].numpy()[0, :, :] |