chore(transformer-xl): Initial commit
This commit is contained in:
parent
ef4684ef39
commit
10512876f2
46 changed files with 10547 additions and 0 deletions
87
transformer-xl/tf/sota/download.sh
Normal file
87
transformer-xl/tf/sota/download.sh
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
#!/bin/bash
|
||||
|
||||
URL=http://curtis.ml.cmu.edu/datasets/pretrained_xl
|
||||
|
||||
DATA_ROOT=./
|
||||
|
||||
function download () {
|
||||
fileurl=${1}
|
||||
filename=${fileurl##*/}
|
||||
if [ ! -f ${filename} ]; then
|
||||
echo ">>> Download '${filename}' from '${fileurl}'."
|
||||
wget --quiet ${fileurl}
|
||||
else
|
||||
echo "*** File '${filename}' exists. Skip."
|
||||
fi
|
||||
}
|
||||
|
||||
cd $DATA_ROOT
|
||||
mkdir -p pretrained_xl && cd pretrained_xl
|
||||
|
||||
# enwik8
|
||||
mkdir -p tf_enwik8 && cd tf_enwik8
|
||||
|
||||
mkdir -p data && cd data
|
||||
download ${URL}/tf_enwiki8/data/cache.pkl
|
||||
download ${URL}/tf_enwiki8/data/corpus-info.json
|
||||
cd ..
|
||||
|
||||
mkdir -p model && cd model
|
||||
download ${URL}/tf_enwiki8/model/checkpoint
|
||||
download ${URL}/tf_enwiki8/model/model.ckpt-0.data-00000-of-00001
|
||||
download ${URL}/tf_enwiki8/model/model.ckpt-0.index
|
||||
download ${URL}/tf_enwiki8/model/model.ckpt-0.meta
|
||||
cd ..
|
||||
|
||||
cd ..
|
||||
|
||||
# text8
|
||||
mkdir -p tf_text8 && cd tf_text8
|
||||
|
||||
mkdir -p data && cd data
|
||||
download ${URL}/tf_text8/data/cache.pkl
|
||||
download ${URL}/tf_text8/data/corpus-info.json
|
||||
cd ..
|
||||
|
||||
mkdir -p model && cd model
|
||||
download ${URL}/tf_text8/model/checkpoint
|
||||
download ${URL}/tf_text8/model/model.ckpt-0.data-00000-of-00001
|
||||
download ${URL}/tf_text8/model/model.ckpt-0.index
|
||||
download ${URL}/tf_text8/model/model.ckpt-0.meta
|
||||
cd ..
|
||||
|
||||
cd ..
|
||||
|
||||
# wt103
|
||||
mkdir -p tf_wt103 && cd tf_wt103
|
||||
|
||||
mkdir -p data && cd data
|
||||
download ${URL}/tf_wt103/data/cache.pkl
|
||||
download ${URL}/tf_wt103/data/corpus-info.json
|
||||
cd ..
|
||||
|
||||
mkdir -p model && cd model
|
||||
download ${URL}/tf_wt103/model/checkpoint
|
||||
download ${URL}/tf_wt103/model/model.ckpt-0.data-00000-of-00001
|
||||
download ${URL}/tf_wt103/model/model.ckpt-0.index
|
||||
download ${URL}/tf_wt103/model/model.ckpt-0.meta
|
||||
cd ..
|
||||
|
||||
cd ..
|
||||
|
||||
# lm1b
|
||||
mkdir -p tf_lm1b && cd tf_lm1b
|
||||
|
||||
mkdir -p data && cd data
|
||||
download ${URL}/tf_lm1b/data/cache.pkl
|
||||
download ${URL}/tf_lm1b/data/corpus-info.json
|
||||
cd ..
|
||||
|
||||
mkdir -p model && cd model
|
||||
download ${URL}/tf_lm1b/model/checkpoint
|
||||
download ${URL}/tf_lm1b/model/model.ckpt-1191000.data-00000-of-00001
|
||||
download ${URL}/tf_lm1b/model/model.ckpt-1191000.index
|
||||
download ${URL}/tf_lm1b/model/model.ckpt-1191000.meta
|
||||
cd ..
|
||||
|
||||
cd ..
|
||||
58
transformer-xl/tf/sota/enwik8.sh
Normal file
58
transformer-xl/tf/sota/enwik8.sh
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Data
|
||||
DATA_ROOT=./
|
||||
DATA_DIR=${DATA_ROOT}/pretrained_xl/tf_enwik8/data
|
||||
MODEL_DIR=${DATA_ROOT}/pretrained_xl/tf_enwik8/model
|
||||
|
||||
# Model
|
||||
N_LAYER=24
|
||||
D_MODEL=1024
|
||||
D_EMBED=1024
|
||||
N_HEAD=8
|
||||
D_HEAD=128
|
||||
D_INNER=3072
|
||||
|
||||
# Testing
|
||||
TEST_TGT_LEN=128
|
||||
TEST_MEM_LEN=3800
|
||||
TEST_CLAMP_LEN=1000
|
||||
|
||||
TEST_CKPT_PATH=${MODEL_DIR}/model.ckpt-0
|
||||
TEST_BSZ=16
|
||||
TEST_NUM_CORE=2
|
||||
|
||||
|
||||
echo 'Preprocess test set...'
|
||||
python data_utils.py \
|
||||
--data_dir=${DATA_DIR}/ \
|
||||
--dataset=enwik8 \
|
||||
--tgt_len=${TEST_TGT_LEN} \
|
||||
--per_host_test_bsz=${TEST_BSZ} \
|
||||
--num_passes=1 \
|
||||
--use_tpu=False
|
||||
|
||||
echo 'Run evaluation on test set...'
|
||||
python train_gpu.py \
|
||||
--data_dir=${DATA_DIR}/tfrecords \
|
||||
--record_info_dir=${DATA_DIR}/tfrecords/ \
|
||||
--corpus_info_path=${DATA_DIR}/corpus-info.json \
|
||||
--eval_ckpt_path=${TEST_CKPT_PATH} \
|
||||
--model_dir=EXP-enwik8 \
|
||||
--n_layer=${N_LAYER} \
|
||||
--d_model=${D_MODEL} \
|
||||
--d_embed=${D_EMBED} \
|
||||
--n_head=${N_HEAD} \
|
||||
--d_head=${D_HEAD} \
|
||||
--d_inner=${D_INNER} \
|
||||
--dropout=0.0 \
|
||||
--dropatt=0.0 \
|
||||
--tgt_len=${TEST_TGT_LEN} \
|
||||
--mem_len=${TEST_MEM_LEN} \
|
||||
--clamp_len=${TEST_CLAMP_LEN} \
|
||||
--same_length=True \
|
||||
--eval_batch_size=${TEST_BSZ} \
|
||||
--num_core_per_host=${TEST_NUM_CORE} \
|
||||
--do_train=False \
|
||||
--do_eval=True \
|
||||
--eval_split=test
|
||||
63
transformer-xl/tf/sota/lm1b.sh
Normal file
63
transformer-xl/tf/sota/lm1b.sh
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Data
|
||||
DATA_ROOT=./
|
||||
DATA_DIR=${DATA_ROOT}/pretrained_xl/tf_lm1b/data
|
||||
MODEL_DIR=${DATA_ROOT}/pretrained_xl/tf_lm1b/model
|
||||
|
||||
# Model
|
||||
DIV_VAL=4
|
||||
N_LAYER=24
|
||||
D_MODEL=1280
|
||||
D_EMBED=1280
|
||||
N_HEAD=16
|
||||
D_HEAD=80
|
||||
D_INNER=8192
|
||||
|
||||
# Testing
|
||||
TEST_TGT_LEN=32
|
||||
TEST_MEM_LEN=128
|
||||
TEST_CLAMP_LEN=-1
|
||||
|
||||
TEST_CKPT_PATH=${MODEL_DIR}/model.ckpt-1191000
|
||||
TEST_BSZ=16
|
||||
TEST_NUM_CORE=1
|
||||
|
||||
|
||||
echo 'Preprocess test set...'
|
||||
python data_utils.py \
|
||||
--data_dir=${DATA_DIR}/ \
|
||||
--dataset=lm1b \
|
||||
--tgt_len=${TEST_TGT_LEN} \
|
||||
--per_host_test_bsz=${TEST_BSZ} \
|
||||
--num_passes=1 \
|
||||
--use_tpu=False
|
||||
|
||||
echo 'Run evaluation on test set...'
|
||||
python train_gpu.py \
|
||||
--data_dir=${DATA_DIR}/tfrecords \
|
||||
--record_info_dir=${DATA_DIR}/tfrecords/ \
|
||||
--corpus_info_path=${DATA_DIR}/corpus-info.json \
|
||||
--eval_ckpt_path=${TEST_CKPT_PATH} \
|
||||
--model_dir=EXP-lm1b \
|
||||
--div_val=${DIV_VAL} \
|
||||
--untie_r=True \
|
||||
--proj_share_all_but_first=False \
|
||||
--proj_same_dim=False \
|
||||
--n_layer=${N_LAYER} \
|
||||
--d_model=${D_MODEL} \
|
||||
--d_embed=${D_EMBED} \
|
||||
--n_head=${N_HEAD} \
|
||||
--d_head=${D_HEAD} \
|
||||
--d_inner=${D_INNER} \
|
||||
--dropout=0.0 \
|
||||
--dropatt=0.0 \
|
||||
--tgt_len=${TEST_TGT_LEN} \
|
||||
--mem_len=${TEST_MEM_LEN} \
|
||||
--clamp_len=${TEST_CLAMP_LEN} \
|
||||
--same_length=True \
|
||||
--eval_batch_size=${TEST_BSZ} \
|
||||
--num_core_per_host=${TEST_NUM_CORE} \
|
||||
--do_train=False \
|
||||
--do_eval=True \
|
||||
--eval_split=test
|
||||
58
transformer-xl/tf/sota/text8.sh
Normal file
58
transformer-xl/tf/sota/text8.sh
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Data
|
||||
DATA_ROOT=./
|
||||
DATA_DIR=${DATA_ROOT}/pretrained_xl/tf_text8/data
|
||||
MODEL_DIR=${DATA_ROOT}/pretrained_xl/tf_text8/model
|
||||
|
||||
# Model
|
||||
N_LAYER=24
|
||||
D_MODEL=1024
|
||||
D_EMBED=1024
|
||||
N_HEAD=8
|
||||
D_HEAD=128
|
||||
D_INNER=3072
|
||||
|
||||
# Testing
|
||||
TEST_TGT_LEN=128
|
||||
TEST_MEM_LEN=3800
|
||||
TEST_CLAMP_LEN=1000
|
||||
|
||||
TEST_CKPT_PATH=${MODEL_DIR}/model.ckpt-0
|
||||
TEST_BSZ=16
|
||||
TEST_NUM_CORE=2
|
||||
|
||||
|
||||
echo 'Preprocess test set...'
|
||||
python data_utils.py \
|
||||
--data_dir=${DATA_DIR}/ \
|
||||
--dataset=text8 \
|
||||
--tgt_len=${TEST_TGT_LEN} \
|
||||
--per_host_test_bsz=${TEST_BSZ} \
|
||||
--num_passes=1 \
|
||||
--use_tpu=False
|
||||
|
||||
echo 'Run evaluation on test set...'
|
||||
python train_gpu.py \
|
||||
--data_dir=${DATA_DIR}/tfrecords \
|
||||
--record_info_dir=${DATA_DIR}/tfrecords/ \
|
||||
--corpus_info_path=${DATA_DIR}/corpus-info.json \
|
||||
--eval_ckpt_path=${TEST_CKPT_PATH} \
|
||||
--model_dir=EXP-text8 \
|
||||
--n_layer=${N_LAYER} \
|
||||
--d_model=${D_MODEL} \
|
||||
--d_embed=${D_EMBED} \
|
||||
--n_head=${N_HEAD} \
|
||||
--d_head=${D_HEAD} \
|
||||
--d_inner=${D_INNER} \
|
||||
--dropout=0.0 \
|
||||
--dropatt=0.0 \
|
||||
--tgt_len=${TEST_TGT_LEN} \
|
||||
--mem_len=${TEST_MEM_LEN} \
|
||||
--clamp_len=${TEST_CLAMP_LEN} \
|
||||
--same_length=True \
|
||||
--eval_batch_size=${TEST_BSZ} \
|
||||
--num_core_per_host=${TEST_NUM_CORE} \
|
||||
--do_train=False \
|
||||
--do_eval=True \
|
||||
--eval_split=test
|
||||
71
transformer-xl/tf/sota/wt103.sh
Normal file
71
transformer-xl/tf/sota/wt103.sh
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Data
|
||||
DATA_ROOT=./
|
||||
DATA_DIR=${DATA_ROOT}/pretrained_xl/tf_wt103/data
|
||||
MODEL_DIR=${DATA_ROOT}/pretrained_xl/tf_wt103/model
|
||||
|
||||
# Model
|
||||
DIV_VAL=4
|
||||
N_LAYER=18
|
||||
D_MODEL=1024
|
||||
D_EMBED=1024
|
||||
N_HEAD=16
|
||||
D_HEAD=64
|
||||
D_INNER=4096
|
||||
|
||||
# Training
|
||||
TGT_LEN=256
|
||||
MEM_LEN=256
|
||||
|
||||
BSZ=16
|
||||
NUM_CORE=2
|
||||
|
||||
# Testing
|
||||
TEST_TGT_LEN=128
|
||||
TEST_MEM_LEN=1600
|
||||
TEST_CLAMP_LEN=1000
|
||||
|
||||
TEST_CKPT_PATH=${MODEL_DIR}/model.ckpt-0
|
||||
TEST_BSZ=16
|
||||
TEST_NUM_CORE=1
|
||||
|
||||
|
||||
echo 'Preprocess test set...'
|
||||
python data_utils.py \
|
||||
--data_dir=${DATA_DIR}/ \
|
||||
--dataset=enwik8 \
|
||||
--tgt_len=${TEST_TGT_LEN} \
|
||||
--per_host_test_bsz=${TEST_BSZ} \
|
||||
--num_passes=1 \
|
||||
--use_tpu=False
|
||||
|
||||
|
||||
echo 'Run evaluation on test set...'
|
||||
python train_gpu.py \
|
||||
--data_dir=${DATA_DIR}/tfrecords \
|
||||
--record_info_dir=${DATA_DIR}/tfrecords/ \
|
||||
--corpus_info_path=${DATA_DIR}/corpus-info.json \
|
||||
--eval_ckpt_path=${TEST_CKPT_PATH} \
|
||||
--model_dir=EXP-wt103 \
|
||||
--div_val=${DIV_VAL} \
|
||||
--untie_r=True \
|
||||
--proj_share_all_but_first=True \
|
||||
--n_layer=${N_LAYER} \
|
||||
--d_model=${D_MODEL} \
|
||||
--d_embed=${D_EMBED} \
|
||||
--n_head=${N_HEAD} \
|
||||
--d_head=${D_HEAD} \
|
||||
--d_inner=${D_INNER} \
|
||||
--dropout=0.0 \
|
||||
--dropatt=0.0 \
|
||||
--tgt_len=${TEST_TGT_LEN} \
|
||||
--mem_len=${TEST_MEM_LEN} \
|
||||
--clamp_len=${TEST_CLAMP_LEN} \
|
||||
--same_length=True \
|
||||
--eval_batch_size=${TEST_BSZ} \
|
||||
--num_core_per_host=${TEST_NUM_CORE} \
|
||||
--do_train=False \
|
||||
--do_eval=True \
|
||||
--eval_split=test
|
||||
|
||||
Reference in a new issue