multiple fixes
Browse files- __pycache__/tasks_v4.cpython-38.pyc +0 -0
- finetune_large_mt5_sentencefix.gin +2 -2
- finetune_large_mt5_sentencefix_v4.gin +2 -2
- finetune_mt5_sentencefix.gin +1 -1
- finetune_mt5_sentencefix_v4.gin +1 -1
- finetune_small_mt5_sentencefix.gin +1 -1
- finetune_small_mt5_sentencefix_v4.gin +1 -1
- tasks_v4.py +1 -1
- train_large_v4.sh +0 -1
- train_small_v4.sh +11 -0
- train_v4.sh +0 -1
__pycache__/tasks_v4.cpython-38.pyc
CHANGED
Binary files a/__pycache__/tasks_v4.cpython-38.pyc and b/__pycache__/tasks_v4.cpython-38.pyc differ
|
|
finetune_large_mt5_sentencefix.gin
CHANGED
@@ -7,12 +7,12 @@ from t5x import models
|
|
7 |
from t5x import partitioning
|
8 |
from t5x import utils
|
9 |
|
10 |
-
include "t5x/examples/t5/mt5/
|
11 |
include "t5x/configs/runs/finetune.gin"
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
-
TRAIN_STEPS =
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
|
|
7 |
from t5x import partitioning
|
8 |
from t5x import utils
|
9 |
|
10 |
+
include "t5x/examples/t5/mt5/large.gin"
|
11 |
include "t5x/configs/runs/finetune.gin"
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
+
TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
finetune_large_mt5_sentencefix_v4.gin
CHANGED
@@ -7,12 +7,12 @@ from t5x import models
|
|
7 |
from t5x import partitioning
|
8 |
from t5x import utils
|
9 |
|
10 |
-
include "t5x/examples/t5/mt5/
|
11 |
include "t5x/configs/runs/finetune.gin"
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
-
TRAIN_STEPS =
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
|
|
7 |
from t5x import partitioning
|
8 |
from t5x import utils
|
9 |
|
10 |
+
include "t5x/examples/t5/mt5/large.gin"
|
11 |
include "t5x/configs/runs/finetune.gin"
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
+
TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
finetune_mt5_sentencefix.gin
CHANGED
@@ -12,7 +12,7 @@ include "t5x/configs/runs/finetune.gin"
|
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
-
TRAIN_STEPS =
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
|
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
+
TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
finetune_mt5_sentencefix_v4.gin
CHANGED
@@ -12,7 +12,7 @@ include "t5x/configs/runs/finetune.gin"
|
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
-
TRAIN_STEPS =
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
|
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
+
TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
finetune_small_mt5_sentencefix.gin
CHANGED
@@ -12,7 +12,7 @@ include "t5x/configs/runs/finetune.gin"
|
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
-
TRAIN_STEPS =
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
|
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
+
TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 20000 fine-tuning steps.
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
finetune_small_mt5_sentencefix_v4.gin
CHANGED
@@ -12,7 +12,7 @@ include "t5x/configs/runs/finetune.gin"
|
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
-
TRAIN_STEPS =
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
|
|
12 |
|
13 |
MIXTURE_OR_TASK_NAME = "sentencefix"
|
14 |
TASK_FEATURE_LENGTHS = {"inputs": 256, "targets": 256}
|
15 |
+
TRAIN_STEPS = 1_200_000 # 1000000 pre-trained steps + 200000 fine-tuning steps.
|
16 |
USE_CACHED_TASKS = False
|
17 |
DROPOUT_RATE = 0.0
|
18 |
RANDOM_SEED = 0
|
tasks_v4.py
CHANGED
@@ -59,7 +59,7 @@ seqio.TaskRegistry.add(
|
|
59 |
sentencefix_preprocessor,
|
60 |
seqio.preprocessors.tokenize_and_append_eos,
|
61 |
],
|
62 |
-
|
63 |
output_features=DEFAULT_OUTPUT_FEATURES,
|
64 |
)
|
65 |
|
|
|
59 |
sentencefix_preprocessor,
|
60 |
seqio.preprocessors.tokenize_and_append_eos,
|
61 |
],
|
62 |
+
metric_fns=[metrics.bleu],
|
63 |
output_features=DEFAULT_OUTPUT_FEATURES,
|
64 |
)
|
65 |
|
train_large_v4.sh
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
|
2 |
T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
3 |
-
TFDS_DATA_DIR="gs://nb-t5x-us-central2/corpus_multi_sentencefix_mt5"
|
4 |
MODEL_DIR="gs://nb-t5x-us-central2/large_model_multi_sentencefix_mt5"
|
5 |
export PYTHONPATH=${PROJECT_DIR}
|
6 |
|
|
|
1 |
PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
|
2 |
T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
|
|
3 |
MODEL_DIR="gs://nb-t5x-us-central2/large_model_multi_sentencefix_mt5"
|
4 |
export PYTHONPATH=${PROJECT_DIR}
|
5 |
|
train_small_v4.sh
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
|
2 |
+
T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
3 |
+
MODEL_DIR="gs://nb-t5x-us-central2/small_model_multi_sentencefix_mt5"
|
4 |
+
export PYTHONPATH=${PROJECT_DIR}
|
5 |
+
|
6 |
+
python3 ${T5X_DIR}/t5x/train.py \
|
7 |
+
--gin_search_paths=${PROJECT_DIR} \
|
8 |
+
--gin_file="finetune_small_mt5_sentencefix_v4.gin" \
|
9 |
+
--gin.MODEL_DIR="'${MODEL_DIR}'" \
|
10 |
+
--tfds_data_dir=${TFDS_DATA_DIR}
|
11 |
+
|
train_v4.sh
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
|
2 |
T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
3 |
-
TFDS_DATA_DIR="gs://nb-t5x-us-central2/corpus_multi_sentencefix_mt5"
|
4 |
MODEL_DIR="gs://nb-t5x-us-central2/model_multi_sentencefix_mt5"
|
5 |
export PYTHONPATH=${PROJECT_DIR}
|
6 |
|
|
|
1 |
PROJECT_DIR=${HOME}"/models/multi-sentencefix-mt5"
|
2 |
T5X_DIR="../../t5x" # directory where the t5x is cloned.
|
|
|
3 |
MODEL_DIR="gs://nb-t5x-us-central2/model_multi_sentencefix_mt5"
|
4 |
export PYTHONPATH=${PROJECT_DIR}
|
5 |
|