DeepLearningExamples/PyTorch/LanguageModeling/BERT/data/bookcorpus/download_bookcorpus.sh

10 lines
277 B
Bash
Raw Normal View History

2019-07-08 22:51:28 +02:00
#! /bin/bash
# Download books
mkdir -p ./download
python3 /workspace/bookcorpus/download_files.py --list /workspace/bookcorpus/url_list.jsonl --out ./download --trash-bad-count
# Clean and prep (one book per line)
python3 ./clean_and_merge_text.py ./download bookcorpus.txt