python/FunASR-XL.git

parent: 9321718b | 补丁 | 提交 | ignore whitespace

游雁

2023-07-06 7a207808bc89935a8494c3a9da1ae7fea1c37fd1

np fix bug

9个文件已修改

	README_zh.md	7 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/asr_inference_launch.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/diar_infer.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/models/encoder/rnn_encoder.py	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/modules/data2vec/data_utils.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/modules/frontends/mask_estimator.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/modules/nets_utils.py	8 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/utils/misc.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/version.txt	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 README_zh.md

@@ -92,7 +92,7 @@

#### python版本示例

支持实时流式语音识别，并且会用非流式模型进行纠错，输出文本带有标点。目前只支持单个client，如需多并发请参考c++版本服务部署SDK（[点击此处]()）
支持实时流式语音识别，并且会用非流式模型进行纠错，输出文本带有标点。目前只支持单个client，如需多并发请参考下方c++版本服务部署SDK

##### 服务端部署
```shell
@@ -107,6 +107,7 @@
```
更多例子可以参考（[点击此处](https://alibaba-damo-academy.github.io/FunASR/en/runtime/websocket_python.html#id2)）

<a name="cpp版本示例"></a>
#### c++版本示例

目前已支持离线文件转写服务（CPU），支持上百路并发请求
@@ -162,9 +163,9 @@
<a name="联系我们"></a>
## 联系我们

如果您在使用中遇到困难，可以通过一下方式联系我们
如果您在使用中遇到困难，可以通过以下方式联系我们

- email: [funasr@list.alibaba-inc.com](funasr@list.alibaba-inc.com)
- 邮件: [funasr@list.alibaba-inc.com](funasr@list.alibaba-inc.com)

|                                  钉钉群                                  |                          微信                           |
|:---------------------------------------------------------------------:|:-----------------------------------------------------:|

 funasr/bin/asr_inference_launch.py

@@ -639,7 +639,8 @@
            batch_size_token_ms = batch_size_token*60
            if speech2text.device == "cpu":
                batch_size_token_ms = 0
            batch_size_token_ms = max(batch_size_token_ms, sorted_data[0][0][1] - sorted_data[0][0][0])
            if len(sorted_data) > 0 and len(sorted_data[0]) > 0:
                batch_size_token_ms = max(batch_size_token_ms, sorted_data[0][0][1] - sorted_data[0][0][0])
            
            batch_size_token_ms_cum = 0
            beg_idx = 0

 funasr/bin/diar_infer.py

@@ -179,7 +179,7 @@

    @staticmethod
    def seq2arr(seq, vec_dim=8):
        def int2vec(x, vec_dim=8, dtype=np.int):
        def int2vec(x, vec_dim=8, dtype=np.int32):
            b = ('{:0' + str(vec_dim) + 'b}').format(x)
            # little-endian order: lower bit first
            return (np.array(list(b)[::-1]) == '1').astype(dtype)

 funasr/models/encoder/rnn_encoder.py

@@ -46,12 +46,12 @@
            raise ValueError(f"Not supported rnn_type={rnn_type}")

        if subsample is None:
            subsample = np.ones(num_layers + 1, dtype=np.int)
            subsample = np.ones(num_layers + 1, dtype=np.int32)
        else:
            subsample = subsample[:num_layers]
            # Append 1 at the beginning because the second or later is used
            subsample = np.pad(
                np.array(subsample, dtype=np.int),
                np.array(subsample, dtype=np.int32),
                [1, num_layers - len(subsample)],
                mode="constant",
                constant_values=1,

 funasr/modules/data2vec/data_utils.py

@@ -105,7 +105,7 @@
            for length in sorted(lengths, reverse=True):
                lens = np.fromiter(
                    (e - s if e - s >= length + min_space else 0 for s, e in parts),
                    np.int,
                    np.int32,
                )
                l_sum = np.sum(lens)
                if l_sum == 0:

 funasr/modules/frontends/mask_estimator.py

@@ -13,7 +13,7 @@
class MaskEstimator(torch.nn.Module):
    def __init__(self, type, idim, layers, units, projs, dropout, nmask=1):
        super().__init__()
        subsample = np.ones(layers + 1, dtype=np.int)
        subsample = np.ones(layers + 1, dtype=np.int32)

        typ = type.lstrip("vgg").rstrip("p")
        if type[-1] == "p":

 funasr/modules/nets_utils.py

@@ -407,7 +407,7 @@

    elif mode == "mt" and arch == "rnn":
        # +1 means input (+1) and layers outputs (train_args.elayer)
        subsample = np.ones(train_args.elayers + 1, dtype=np.int)
        subsample = np.ones(train_args.elayers + 1, dtype=np.int32)
        logging.warning("Subsampling is not performed for machine translation.")
        logging.info("subsample: " + " ".join([str(x) for x in subsample]))
        return subsample
@@ -417,7 +417,7 @@
            or (mode == "mt" and arch == "rnn")
            or (mode == "st" and arch == "rnn")
    ):
        subsample = np.ones(train_args.elayers + 1, dtype=np.int)
        subsample = np.ones(train_args.elayers + 1, dtype=np.int32)
        if train_args.etype.endswith("p") and not train_args.etype.startswith("vgg"):
            ss = train_args.subsample.split("_")
            for j in range(min(train_args.elayers + 1, len(ss))):
@@ -432,7 +432,7 @@

    elif mode == "asr" and arch == "rnn_mix":
        subsample = np.ones(
            train_args.elayers_sd + train_args.elayers + 1, dtype=np.int
            train_args.elayers_sd + train_args.elayers + 1, dtype=np.int32
        )
        if train_args.etype.endswith("p") and not train_args.etype.startswith("vgg"):
            ss = train_args.subsample.split("_")
@@ -451,7 +451,7 @@
    elif mode == "asr" and arch == "rnn_mulenc":
        subsample_list = []
        for idx in range(train_args.num_encs):
            subsample = np.ones(train_args.elayers[idx] + 1, dtype=np.int)
            subsample = np.ones(train_args.elayers[idx] + 1, dtype=np.int32)
            if train_args.etype[idx].endswith("p") and not train_args.etype[
                idx
            ].startswith("vgg"):

 funasr/utils/misc.py

@@ -12,7 +12,7 @@
    return numel


def int2vec(x, vec_dim=8, dtype=np.int):
def int2vec(x, vec_dim=8, dtype=np.int32):
    b = ('{:0' + str(vec_dim) + 'b}').format(x)
    # little-endian order: lower bit first
    return (np.array(list(b)[::-1]) == '1').astype(dtype)

 funasr/version.txt

@@ -1 +1 @@
0.6.8
0.6.9

			@@ -92,7 +92,7 @@

			#### python版本示例

			支持实时流式语音识别，并且会用非流式模型进行纠错，输出文本带有标点。目前只支持单个client，如需多并发请参考c++版本服务部署SDK（[点击此处]()）
			支持实时流式语音识别，并且会用非流式模型进行纠错，输出文本带有标点。目前只支持单个client，如需多并发请参考下方c++版本服务部署SDK

			##### 服务端部署
			```shell
			@@ -107,6 +107,7 @@
			```
			更多例子可以参考（[点击此处](https://alibaba-damo-academy.github.io/FunASR/en/runtime/websocket_python.html#id2)）

			<a name="cpp版本示例"></a>
			#### c++版本示例

			目前已支持离线文件转写服务（CPU），支持上百路并发请求
			@@ -162,9 +163,9 @@
			<a name="联系我们"></a>
			## 联系我们

			如果您在使用中遇到困难，可以通过一下方式联系我们
			如果您在使用中遇到困难，可以通过以下方式联系我们

			- email: [funasr@list.alibaba-inc.com](funasr@list.alibaba-inc.com)
			- 邮件: [funasr@list.alibaba-inc.com](funasr@list.alibaba-inc.com)

			\| 钉钉群 \| 微信 \|
			\|:---------------------------------------------------------------------:\|:-----------------------------------------------------:\|

			@@ -639,7 +639,8 @@
			batch_size_token_ms = batch_size_token*60
			if speech2text.device == "cpu":
			batch_size_token_ms = 0
			batch_size_token_ms = max(batch_size_token_ms, sorted_data[0][0][1] - sorted_data[0][0][0])
			if len(sorted_data) > 0 and len(sorted_data[0]) > 0:
			batch_size_token_ms = max(batch_size_token_ms, sorted_data[0][0][1] - sorted_data[0][0][0])

			batch_size_token_ms_cum = 0
			beg_idx = 0

			@@ -179,7 +179,7 @@

			@staticmethod
			def seq2arr(seq, vec_dim=8):
			def int2vec(x, vec_dim=8, dtype=np.int):
			def int2vec(x, vec_dim=8, dtype=np.int32):
			b = ('{:0' + str(vec_dim) + 'b}').format(x)
			# little-endian order: lower bit first
			return (np.array(list(b)[::-1]) == '1').astype(dtype)

			@@ -46,12 +46,12 @@
			raise ValueError(f"Not supported rnn_type={rnn_type}")

			if subsample is None:
			subsample = np.ones(num_layers + 1, dtype=np.int)
			subsample = np.ones(num_layers + 1, dtype=np.int32)
			else:
			subsample = subsample[:num_layers]
			# Append 1 at the beginning because the second or later is used
			subsample = np.pad(
			np.array(subsample, dtype=np.int),
			np.array(subsample, dtype=np.int32),
			[1, num_layers - len(subsample)],
			mode="constant",
			constant_values=1,

			@@ -105,7 +105,7 @@
			for length in sorted(lengths, reverse=True):
			lens = np.fromiter(
			(e - s if e - s >= length + min_space else 0 for s, e in parts),
			np.int,
			np.int32,
			)
			l_sum = np.sum(lens)
			if l_sum == 0:

			@@ -13,7 +13,7 @@
			class MaskEstimator(torch.nn.Module):
			def __init__(self, type, idim, layers, units, projs, dropout, nmask=1):
			super().__init__()
			subsample = np.ones(layers + 1, dtype=np.int)
			subsample = np.ones(layers + 1, dtype=np.int32)

			typ = type.lstrip("vgg").rstrip("p")
			if type[-1] == "p":

			@@ -407,7 +407,7 @@

			elif mode == "mt" and arch == "rnn":
			# +1 means input (+1) and layers outputs (train_args.elayer)
			subsample = np.ones(train_args.elayers + 1, dtype=np.int)
			subsample = np.ones(train_args.elayers + 1, dtype=np.int32)
			logging.warning("Subsampling is not performed for machine translation.")
			logging.info("subsample: " + " ".join([str(x) for x in subsample]))
			return subsample
			@@ -417,7 +417,7 @@
			or (mode == "mt" and arch == "rnn")
			or (mode == "st" and arch == "rnn")
			):
			subsample = np.ones(train_args.elayers + 1, dtype=np.int)
			subsample = np.ones(train_args.elayers + 1, dtype=np.int32)
			if train_args.etype.endswith("p") and not train_args.etype.startswith("vgg"):
			ss = train_args.subsample.split("_")
			for j in range(min(train_args.elayers + 1, len(ss))):
			@@ -432,7 +432,7 @@

			elif mode == "asr" and arch == "rnn_mix":
			subsample = np.ones(
			train_args.elayers_sd + train_args.elayers + 1, dtype=np.int
			train_args.elayers_sd + train_args.elayers + 1, dtype=np.int32
			)
			if train_args.etype.endswith("p") and not train_args.etype.startswith("vgg"):
			ss = train_args.subsample.split("_")
			@@ -451,7 +451,7 @@
			elif mode == "asr" and arch == "rnn_mulenc":
			subsample_list = []
			for idx in range(train_args.num_encs):
			subsample = np.ones(train_args.elayers[idx] + 1, dtype=np.int)
			subsample = np.ones(train_args.elayers[idx] + 1, dtype=np.int32)
			if train_args.etype[idx].endswith("p") and not train_args.etype[
			idx
			].startswith("vgg"):

			@@ -12,7 +12,7 @@
			return numel


			def int2vec(x, vec_dim=8, dtype=np.int):
			def int2vec(x, vec_dim=8, dtype=np.int32):
			b = ('{:0' + str(vec_dim) + 'b}').format(x)
			# little-endian order: lower bit first
			return (np.array(list(b)[::-1]) == '1').astype(dtype)