| | |
| | | path=init_param, |
| | | ignore_init_mismatch=kwargs.get("ignore_init_mismatch", False), |
| | | oss_bucket=kwargs.get("oss_bucket", None), |
| | | scope_map=kwargs.get("scope_map", "module.,None"), |
| | | scope_map=kwargs.get("scope_map", []), |
| | | excludes=kwargs.get("excludes", None), |
| | | ) |
| | | else: |
| | |
| | | path=p, |
| | | ignore_init_mismatch=kwargs.get("ignore_init_mismatch", True), |
| | | oss_bucket=kwargs.get("oss_bucket", None), |
| | | scope_map=kwargs.get("scope_map", "module.,none"), |
| | | scope_map=kwargs.get("scope_map", []), |
| | | excludes=kwargs.get("excludes", None), |
| | | ) |
| | | else: |
| | |
| | | model_outputs = self.llm(inputs_embeds=inputs_embeds, attention_mask=attention_mask, labels=None) |
| | | preds = torch.argmax(model_outputs.logits, -1) |
| | | text = tokenizer.batch_decode(preds, add_special_tokens=False, skip_special_tokens=True) |
| | | text = text.split(': "\n')[-1] |
| | | text = text.split(': \n')[-1] |
| | | # preds = torch.argmax(model_outputs.logits, -1) |
| | | |
| | | ibest_writer = None |
| | |
| | | ignore_init_mismatch: bool=True, |
| | | map_location: str = "cpu", |
| | | oss_bucket=None, |
| | | scope_map="module.:none", |
| | | scope_map=[], |
| | | excludes=None, |
| | | ignore_mismatch=False, |
| | | **kwargs, |
| | |
| | | |
| | | if isinstance(scope_map, str): |
| | | scope_map = scope_map.split(",") |
| | | scope_map += ["module.", "None"] |
| | | |
| | | for k in dst_state.keys(): |
| | | |
| | |
| | | src_prefix = scope_map[i] if scope_map[i].lower() != "none" else "" |
| | | dst_prefix = scope_map[i+1] if scope_map[i+1].lower() != "none" else "" |
| | | |
| | | if k.startswith(dst_prefix) and k.replace(dst_prefix, src_prefix) in src_state.keys(): |
| | | k_src = k.replace(dst_prefix, src_prefix) |
| | | if dst_prefix == "" and (src_prefix + k) in src_state.keys(): |
| | | k_src = src_prefix + k |
| | | print(f"init param, map: {k} from {k_src} in ckpt") |
| | | elif k.startswith(dst_prefix) and k.replace(dst_prefix, src_prefix, 1) in src_state.keys(): |
| | | k_src = k.replace(dst_prefix, src_prefix, 1) |
| | | print(f"init param, map: {k} from {k_src} in ckpt") |
| | | |
| | | if k_src in src_state.keys(): |