From d80ac2fd2df4e7fb8a28acfa512bb11472b5cc99 Mon Sep 17 00:00:00 2001
From: liugz18 <57401541+liugz18@users.noreply.github.com>
Date: 星期四, 18 七月 2024 21:34:55 +0800
Subject: [PATCH] Rename 'res' in line 514 to avoid with naming conflict with line 365
---
funasr/datasets/audio_datasets/jsonl2scp.py | 28 +++++++++++++++-------------
1 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/funasr/datasets/audio_datasets/jsonl2scp.py b/funasr/datasets/audio_datasets/jsonl2scp.py
index 9a2b023..3ce96ca 100644
--- a/funasr/datasets/audio_datasets/jsonl2scp.py
+++ b/funasr/datasets/audio_datasets/jsonl2scp.py
@@ -9,15 +9,14 @@
import torch.distributed as dist
-
def gen_scp_from_jsonl(jsonl_file, data_type_list, wav_scp_file, text_file):
wav_f = open(wav_scp_file, "w")
text_f = open(text_file, "w")
- with open(jsonl_file, encoding='utf-8') as fin:
+ with open(jsonl_file, encoding="utf-8") as fin:
for line in fin:
data = json.loads(line.strip())
-
+
prompt = data.get("prompt", "<ASR>")
source = data[data_type_list[0]]
target = data[data_type_list[1]]
@@ -33,21 +32,26 @@
wav_f.close()
text_f.close()
-
-
-
+
+
@hydra.main(config_name=None, version_base=None)
def main_hydra(cfg: DictConfig):
-
- kwargs = OmegaConf.to_container(cfg, resolve=True)
- scp_file_list = kwargs.get("scp_file_list", ("/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"))
+ kwargs = OmegaConf.to_container(cfg, resolve=True)
+ print(kwargs)
+
+ scp_file_list = kwargs.get(
+ "scp_file_list",
+ ("/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"),
+ )
if isinstance(scp_file_list, str):
scp_file_list = eval(scp_file_list)
data_type_list = kwargs.get("data_type_list", ("source", "target"))
- jsonl_file = kwargs.get("jsonl_file_in", "/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl")
+ jsonl_file = kwargs.get(
+ "jsonl_file_in", "/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl"
+ )
gen_scp_from_jsonl(jsonl_file, data_type_list, *scp_file_list)
-
+
"""
python -m funasr.datasets.audio_datasets.json2scp \
@@ -58,5 +62,3 @@
if __name__ == "__main__":
main_hydra()
-
-
\ No newline at end of file
--
Gitblit v1.9.1