#!/usr/bin/env bash set -euo pipefail SECONDS=0 log() { local fname=${BASH_SOURCE[1]##*/} echo -e "$(date '+%Y-%m-%dT%H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" } help_message=$(cat << EOF Usage: $0 [ []] e.g. $0 data/test/wav.scp data/test_format/ Format 'wav.scp': In short words, changing "kaldi-datadir" to "modified-kaldi-datadir" The 'wav.scp' format in kaldi is very flexible, e.g. It can use unix-pipe as describing that wav file, but it sometime looks confusing and make scripts more complex. This tools creates actual wav files from 'wav.scp' and also segments wav files using 'segments'. Options --fs --segments --nj --cmd EOF ) out_filename=wav.scp cmd=utils/run.pl nj=30 fs=none segments= ref_channels= utt2ref_channels= audio_format=wav write_utt2num_samples=true log "$0 $*" . utils/parse_options.sh if [ $# -ne 2 ] && [ $# -ne 3 ] && [ $# -ne 4 ]; then log "${help_message}" log "Error: invalid command line arguments" exit 1 fi . ./path.sh # Setup the environment scp=$1 if [ ! -f "${scp}" ]; then log "${help_message}" echo "$0: Error: No such file: ${scp}" exit 1 fi dir=$2 if [ $# -eq 2 ]; then logdir=${dir}/logs outdir=${dir}/data elif [ $# -eq 3 ]; then logdir=$3 outdir=${dir}/data elif [ $# -eq 4 ]; then logdir=$3 outdir=$4 fi mkdir -p ${logdir} rm -f "${dir}/${out_filename}" opts= if [ -n "${utt2ref_channels}" ]; then opts="--utt2ref-channels ${utt2ref_channels} " elif [ -n "${ref_channels}" ]; then opts="--ref-channels ${ref_channels} " fi if [ -n "${segments}" ]; then log "[info]: using ${segments}" nutt=$(<${segments} wc -l) nj=$((nj /dev/null # concatenate the .scp files together. for n in $(seq ${nj}); do cat "${outdir}/format.${n}/wav.scp" || exit 1; done > "${dir}/${out_filename}" || exit 1 if "${write_utt2num_samples}"; then for n in $(seq ${nj}); do cat "${outdir}/format.${n}/utt2num_samples" || exit 1; done > "${dir}/utt2num_samples" || exit 1 fi log "Successfully finished. [elapsed=${SECONDS}s]"