from __gin__ import dynamic_registration | |
import tasks_v4 | |
import __main__ as train_script | |
from t5.data import mixtures | |
from t5x import models | |
from t5x import partitioning | |
from t5x import utils | |
include "t5x/examples/t5/mt5/large.gin" | |
include "t5x/configs/runs/finetune.gin" | |
MIXTURE_OR_TASK_NAME = "sentencefix" | |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256} | |
TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps. | |
USE_CACHED_TASKS = False | |
DROPOUT_RATE = 0.0 | |
RANDOM_SEED = 0 | |
# `LOSS_NORMALIZING_FACTOR`: When fine-tuning a model that was pre-trained | |
# using Mesh Tensorflow (e.g. the public T5 / mT5 / ByT5 models), this should be | |
# set to `pretraining batch_size` * `target_token_length`. For T5 and T5.1.1: | |
# `2048 * 114`. For mT5: `1024 * 229`. For ByT5: `1024 * 189`. | |
#LOSS_NORMALIZING_FACTOR = 234496 | |
INITIAL_CHECKPOINT_PATH = "gs://t5-data/pretrained_models/t5x/mt5_large/checkpoint_1000000" | |
train_script.train: | |
eval_period = 100 | |
utils.RestoreCheckpointConfig: | |
path = %INITIAL_CHECKPOINT_PATH | |
mode = 'specific' | |
train_script.train: | |
train_dataset_cfg = @train/utils.DatasetConfig() | |
train_eval_dataset_cfg = @train_eval/utils.DatasetConfig() | |
infer_eval_dataset_cfg = @infer_eval/utils.DatasetConfig() | |
models.EncoderDecoderModel.predict_batch_with_aux.num_decodes = 4 | |
infer_eval/utils.DatasetConfig: | |
mixture_or_task_name = %MIXTURE_OR_TASK_NAME | |
task_feature_lengths = %TASK_FEATURE_LENGTHS | |
split = 'validation' | |
batch_size = 64 | |
shuffle = False | |
seed = 42 | |
use_cached = %USE_CACHED_TASKS | |
pack = False | |
module = %MIXTURE_OR_TASK_MODULE | |
seqio.Evaluator: | |
logger_cls = [@seqio.PyLoggingLogger, @seqio.TensorBoardLogger, @seqio.JSONLogger] | |
num_examples = None # Use all examples in the dataset. | |
use_memory_cache = True | |
partitioning.PjitPartitioner.num_partitions = 4 | |