| | |
| | | ++init_param=${file_dir}/model.pb \ |
| | | ++tokenizer_conf.token_list=${file_dir}/tokens.txt \ |
| | | ++frontend_conf.cmvn_file=${file_dir}/am.mvn \ |
| | | ++input=${file_dir}/wav.scp \ |
| | | ++input=${file_dir}/ocr_text \ |
| | | ++input=[${file_dir}/wav.scp,${file_dir}/ocr_text] \ |
| | | +data_type='["sound", "text"]' \ |
| | | ++tokenizer_conf.bpemodel=${file_dir}/bpe.model \ |
| | | ++output_dir="./outputs/debug" \ |
| | |
| | | |
| | | # build model |
| | | model_class = tables.model_classes.get(kwargs["model"]) |
| | | pdb.set_trace() |
| | | model = model_class(**kwargs, **kwargs["model_conf"], vocab_size=vocab_size) |
| | | pdb.set_trace() |
| | | model.to(device) |
| | | |
| | | # init_param |
| | | init_param = kwargs.get("init_param", None) |
| | | pdb.set_trace() |
| | | if init_param is not None: |
| | | logging.info(f"Loading pretrained params from {init_param}") |
| | | load_pretrained_model( |
| | |
| | | |
| | | obj = model |
| | | dst_state = obj.state_dict() |
| | | # import pdb; |
| | | # pdb.set_trace() |
| | | print(f"ckpt: {path}") |
| | | pdb.set_trace() |
| | | |
| | | if oss_bucket is None: |
| | | src_state = torch.load(path, map_location=map_location) |
| | | else: |
| | | buffer = BytesIO(oss_bucket.get_object(path).read()) |
| | | src_state = torch.load(buffer, map_location=map_location) |
| | | pdb.set_trace() |
| | | |
| | | if "state_dict" in src_state: |
| | | src_state = src_state["state_dict"] |
| | | pdb.set_trace() |
| | | |
| | | for k in dst_state.keys(): |
| | | if not k.startswith("module.") and "module." + k in src_state.keys(): |
| | | k_ddp = "module." + k |
| | |
| | | dst_state[k] = src_state[k_ddp] |
| | | else: |
| | | print(f"Miss key in ckpt: model: {k}, ckpt: {k_ddp}") |
| | | pdb.set_trace() |
| | | flag = obj.load_state_dict(dst_state, strict=True) |
| | | # print(flag) |
| | | |