DeepLearningExamples/PyTorch/LanguageModeling/BERT/data/squad/squad_download.sh
Przemek Strzelczyk 0663b67c1a Updating models
2019-07-08 22:51:28 +02:00

61 lines
2 KiB
Bash
Executable file

#!/usr/bin/env bash
echo "Downloading dataset for squad..."
# Download SQuAD
v1="v1.1"
mkdir $v1
wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json -O $v1/train-v1.1.json
wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -O $v1/dev-v1.1.json
wget https://worksheets.codalab.org/rest/bundles/0xbcd57bee090b421c982906709c8c27e1/contents/blob/ -O $v1/evaluate-v1.1.py
EXP_TRAIN_v1='981b29407e0affa3b1b156f72073b945 -'
EXP_DEV_v1='3e85deb501d4e538b6bc56f786231552 -'
EXP_EVAL_v1='afb04912d18ff20696f7f88eed49bea9 -'
CALC_TRAIN_v1=`cat ${v1}/train-v1.1.json |md5sum`
CALC_DEV_v1=`cat ${v1}/dev-v1.1.json |md5sum`
CALC_EVAL_v1=`cat ${v1}/evaluate-v1.1.py |md5sum`
v2="v2.0"
mkdir $v2
wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -O $v2/train-v2.0.json
wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O $v2/dev-v2.0.json
wget https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/ -O $v2/evaluate-v2.0.py
EXP_TRAIN_v2='62108c273c268d70893182d5cf8df740 -'
EXP_DEV_v2='246adae8b7002f8679c027697b0b7cf8 -'
EXP_EVAL_v2='ff23213bed5516ea4a6d9edb6cd7d627 -'
CALC_TRAIN_v2=`cat ${v2}/train-v2.0.json |md5sum`
CALC_DEV_v2=`cat ${v2}/dev-v2.0.json |md5sum`
CALC_EVAL_v2=`cat ${v2}/evaluate-v2.0.py |md5sum`
echo "Squad data download done!"
echo "Verifying Dataset...."
if [ "$EXP_TRAIN_v1" != "$CALC_TRAIN_v1" ]; then
echo "train-v1.1.json is corrupted! md5sum doesn't match"
fi
if [ "$EXP_DEV_v1" != "$CALC_DEV_v1" ]; then
echo "dev-v1.1.json is corrupted! md5sum doesn't match"
fi
if [ "$EXP_EVAL_v1" != "$CALC_EVAL_v1" ]; then
echo "evaluate-v1.1.py is corrupted! md5sum doesn't match"
fi
if [ "$EXP_TRAIN_v2" != "$CALC_TRAIN_v2" ]; then
echo "train-v2.0.json is corrupted! md5sum doesn't match"
fi
if [ "$EXP_DEV_v2" != "$CALC_DEV_v2" ]; then
echo "dev-v2.0.json is corrupted! md5sum doesn't match"
fi
if [ "$EXP_EVAL_v2" != "$CALC_EVAL_v2" ]; then
echo "evaluate-v2.0.py is corrupted! md5sum doesn't match"
fi
echo "Complete!"