jmwang66
2024-02-29 2acd24f0158b2c86d2fb4e6f1134b67a1150500e
funasr/auto/auto_model.py
@@ -95,7 +95,7 @@
class AutoModel:
    
    def __init__(self, **kwargs):
        if not kwargs.get("disable_log", False):
        if not kwargs.get("disable_log", True):
            tables.print()
        
        model, kwargs = self.build_model(**kwargs)
@@ -162,8 +162,10 @@
            tokenizer_class = tables.tokenizer_classes.get(tokenizer)
            tokenizer = tokenizer_class(**kwargs["tokenizer_conf"])
            kwargs["tokenizer"] = tokenizer
            kwargs["token_list"] = tokenizer.token_list
            vocab_size = len(tokenizer.token_list)
            kwargs["token_list"] = tokenizer.token_list if hasattr(tokenizer, "token_list") else None
            kwargs["token_list"] = tokenizer.get_vocab() if hasattr(tokenizer, "get_vocab") else kwargs["token_list"]
            vocab_size = len(kwargs["token_list"])
        else:
            vocab_size = -1
        
@@ -184,15 +186,18 @@
        # init_param
        init_param = kwargs.get("init_param", None)
        if init_param is not None:
            logging.info(f"Loading pretrained params from {init_param}")
            load_pretrained_model(
                model=model,
                path=init_param,
                ignore_init_mismatch=kwargs.get("ignore_init_mismatch", False),
                oss_bucket=kwargs.get("oss_bucket", None),
                scope_map=kwargs.get("scope_map", None),
                excludes=kwargs.get("excludes", None),
            )
            if os.path.exists(init_param):
                logging.info(f"Loading pretrained params from {init_param}")
                load_pretrained_model(
                    model=model,
                    path=init_param,
                    ignore_init_mismatch=kwargs.get("ignore_init_mismatch", False),
                    oss_bucket=kwargs.get("oss_bucket", None),
                    scope_map=kwargs.get("scope_map", []),
                    excludes=kwargs.get("excludes", None),
                )
            else:
                print(f"error, init_param does not exist!: {init_param}")
        
        return model, kwargs
    
@@ -387,7 +392,8 @@
            # step.3 compute punc model
            if self.punc_model is not None:
                if not len(result["text"]):
                    result['raw_text'] = ''
                    if return_raw_text:
                        result['raw_text'] = ''
                else:
                    self.punc_kwargs.update(cfg)
                    punc_res = self.inference(result["text"], model=self.punc_model, kwargs=self.punc_kwargs, **cfg)
@@ -429,10 +435,13 @@
                distribute_spk(sentence_list, sv_output)
                result['sentence_info'] = sentence_list
            elif kwargs.get("sentence_timestamp", False):
                sentence_list = timestamp_sentence(punc_res[0]['punc_array'],
                                                   result['timestamp'],
                                                   raw_text,
                                                   return_raw_text=return_raw_text)
                if not len(result['text']):
                    sentence_list = []
                else:
                    sentence_list = timestamp_sentence(punc_res[0]['punc_array'],
                                                       result['timestamp'],
                                                       raw_text,
                                                       return_raw_text=return_raw_text)
                result['sentence_info'] = sentence_list
            if "spk_embedding" in result: del result['spk_embedding']