zhifu gao
2024-04-17 eaf9dda9e4d970af3d09db695e9e10c83ef94e25
funasr/datasets/audio_datasets/index_ds.py
@@ -92,7 +92,7 @@
            for line in fin:
                data = json.loads(line.strip())
                if "text" in data:  # for sft
                    self.contents.append(data['text'])
                    contents.append(data['text'])
                if "source" in data:  # for speech lab pretrain
                    prompt = data.get("prompt", "<ASR>")
                    source = data["source"]
@@ -101,13 +101,20 @@
                    target_len = data.get("target_len", 0)
                    if "aishell" in source:
                        target = target.replace(" ", "")
                    contents.append({"source": source,
                                     "prompt": prompt,
                                     "target": target,
                                     "source_len": source_len,
                                     "target_len": target_len,
                                     }
                                    )
                    contents_i = {"source": source,
                                 "prompt": prompt,
                                 "target": target,
                                 "source_len": source_len,
                                 "target_len": target_len,
                                 }
                    text_language = data.get("text_language", None)
                    if text_language is not None:
                        contents_i["text_language"] = text_language
                    audio_language = data.get("audio_language", None)
                    if audio_language is not None:
                        contents_i["audio_language"] = audio_language
                    contents.append(contents_i)
        self.contents = contents