From be871ad6d0cbfa1b4b0d61c7906d2eefc1e9d754 Mon Sep 17 00:00:00 2001
From: Yabin Li <wucong.lyb@alibaba-inc.com>
Date: 星期四, 07 三月 2024 12:31:51 +0800
Subject: [PATCH] Update CMakeLists.txt
---
funasr/models/ct_transformer/utils.py | 28 ++++++----------------------
1 files changed, 6 insertions(+), 22 deletions(-)
diff --git a/funasr/models/ct_transformer/utils.py b/funasr/models/ct_transformer/utils.py
index 917f2e0..01b1850 100644
--- a/funasr/models/ct_transformer/utils.py
+++ b/funasr/models/ct_transformer/utils.py
@@ -1,4 +1,10 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+# MIT License (https://opensource.org/licenses/MIT)
+
import re
+
def split_to_mini_sentence(words: list, word_limit: int = 20):
assert word_limit > 1
@@ -12,28 +18,6 @@
if length % word_limit > 0:
sentences.append(words[sentence_len * word_limit:])
return sentences
-
-
-# def split_words(text: str, **kwargs):
-# words = []
-# segs = text.split()
-# for seg in segs:
-# # There is no space in seg.
-# current_word = ""
-# for c in seg:
-# if len(c.encode()) == 1:
-# # This is an ASCII char.
-# current_word += c
-# else:
-# # This is a Chinese char.
-# if len(current_word) > 0:
-# words.append(current_word)
-# current_word = ""
-# words.append(c)
-# if len(current_word) > 0:
-# words.append(current_word)
-#
-# return words
def split_words(text: str, jieba_usr_dict=None, **kwargs):
if jieba_usr_dict:
--
Gitblit v1.9.1