| | |
| | | kwargs["token_list"] = tokenizer.token_list if hasattr(tokenizer, "token_list") else None |
| | | kwargs["token_list"] = tokenizer.get_vocab() if hasattr(tokenizer, "get_vocab") else kwargs["token_list"] |
| | | vocab_size = len(kwargs["token_list"]) if kwargs["token_list"] is not None else -1 |
| | | if vocab_size == -1 and hasattr(tokenizer, "get_vocab_size"): |
| | | vocab_size = tokenizer.get_vocab_size() |
| | | else: |
| | | vocab_size = -1 |
| | | kwargs["tokenizer"] = tokenizer |
| | |
| | | else: |
| | | print(f"error, init_param does not exist!: {init_param}") |
| | | |
| | | # fp16 |
| | | if kwargs.get("fp16", False): |
| | | model.to(torch.float16) |
| | | return model, kwargs |
| | | |
| | | def __call__(self, *args, **cfg): |
| | |
| | | return_raw_text = kwargs.get('return_raw_text', False) |
| | | # step.3 compute punc model |
| | | if self.punc_model is not None: |
| | | if not len(result["text"]): |
| | | if not len(result["text"].strip()): |
| | | if return_raw_text: |
| | | result['raw_text'] = '' |
| | | else: |