| | |
| | | import collections.abc |
| | | from pathlib import Path |
| | | from typing import Union |
| | | from typing import List, Tuple, Union |
| | | |
| | | import random |
| | | import numpy as np |
| | |
| | | |
| | | from funasr.fileio.read_text import read_2column_text |
| | | |
| | | def soundfile_read( |
| | | wavs: Union[str, List[str]], |
| | | dtype=None, |
| | | always_2d: bool = False, |
| | | concat_axis: int = 1, |
| | | start: int = 0, |
| | | end: int = None, |
| | | return_subtype: bool = False, |
| | | ) -> Tuple[np.array, int]: |
| | | if isinstance(wavs, str): |
| | | wavs = [wavs] |
| | | |
| | | arrays = [] |
| | | subtypes = [] |
| | | prev_rate = None |
| | | prev_wav = None |
| | | for wav in wavs: |
| | | with soundfile.SoundFile(wav) as f: |
| | | f.seek(start) |
| | | if end is not None: |
| | | frames = end - start |
| | | else: |
| | | frames = -1 |
| | | if dtype == "float16": |
| | | array = f.read( |
| | | frames, |
| | | dtype="float32", |
| | | always_2d=always_2d, |
| | | ).astype(dtype) |
| | | else: |
| | | array = f.read(frames, dtype=dtype, always_2d=always_2d) |
| | | rate = f.samplerate |
| | | subtype = f.subtype |
| | | subtypes.append(subtype) |
| | | |
| | | if len(wavs) > 1 and array.ndim == 1 and concat_axis == 1: |
| | | # array: (Time, Channel) |
| | | array = array[:, None] |
| | | |
| | | if prev_wav is not None: |
| | | if prev_rate != rate: |
| | | raise RuntimeError( |
| | | f"'{prev_wav}' and '{wav}' have mismatched sampling rate: " |
| | | f"{prev_rate} != {rate}" |
| | | ) |
| | | |
| | | dim1 = arrays[0].shape[1 - concat_axis] |
| | | dim2 = array.shape[1 - concat_axis] |
| | | if dim1 != dim2: |
| | | raise RuntimeError( |
| | | "Shapes must match with " |
| | | f"{1 - concat_axis} axis, but gut {dim1} and {dim2}" |
| | | ) |
| | | |
| | | prev_rate = rate |
| | | prev_wav = wav |
| | | arrays.append(array) |
| | | |
| | | if len(arrays) == 1: |
| | | array = arrays[0] |
| | | else: |
| | | array = np.concatenate(arrays, axis=concat_axis) |
| | | |
| | | if return_subtype: |
| | | return array, rate, subtypes |
| | | else: |
| | | return array, rate |
| | | |
| | | |
| | | class SoundScpReader(collections.abc.Mapping): |
| | | """Reader class for 'wav.scp'. |