DeepLearningExamples/PyTorch/LanguageModeling/BERT/data/bookcorpus/download_bookcorpus.sh
Przemek Strzelczyk 0663b67c1a Updating models
2019-07-08 22:51:28 +02:00

10 lines
277 B
Bash
Executable file

#! /bin/bash
# Download books
mkdir -p ./download
python3 /workspace/bookcorpus/download_files.py --list /workspace/bookcorpus/url_list.jsonl --out ./download --trash-bad-count
# Clean and prep (one book per line)
python3 ./clean_and_merge_text.py ./download bookcorpus.txt