DeepLearningExamples/CUDA-Optimized/FastSpeech/scripts/prepare_dataset.sh
2020-07-31 14:59:15 +08:00

21 lines
550 B
Bash

#!/usr/bin/env bash
set -e
# Download dataset
DATADIR="LJSpeech-1.1"
BZ2ARCHIVE="${DATADIR}.tar.bz2"
ENDPOINT="http://data.keithito.com/data/speech/$BZ2ARCHIVE"
if [ ! -d "$DATADIR" ]; then
echo "dataset is missing, unpacking ..."
if [ ! -f "$BZ2ARCHIVE" ]; then
echo "dataset archive is missing, downloading ..."
wget "$ENDPOINT"
fi
tar jxvf "$BZ2ARCHIVE"
fi
# Partition dataset
sed -n "1,13000p" ${DATADIR}/metadata.csv > ${DATADIR}/metadata_train.csv
sed -n "13001,13100p" ${DATADIR}/metadata.csv > ${DATADIR}/metadata_test.csv