pietrolesci
commited on
Commit
•
7806f40
1
Parent(s):
253cf88
Upload folder using huggingface_hub
Browse files- README.md +46 -0
- hparams.yaml +63 -0
- main.log +140 -0
- tb_logs/version_0/events.out.tfevents.1713881489.dev-gpu-pl487.1865581.0 +3 -0
README.md
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Run info
|
2 |
+
- complete_hash: b97998c098e0d34fa00d918d670b8f9b
|
3 |
+
- short_hash: b9799b8f9b
|
4 |
+
|
5 |
+
### Configuration
|
6 |
+
```yaml
|
7 |
+
data:
|
8 |
+
batch_size: 32
|
9 |
+
data_seed: 42
|
10 |
+
drop_last: false
|
11 |
+
eval_batch_size: 128
|
12 |
+
max_length: 512
|
13 |
+
multiprocessing_context: null
|
14 |
+
num_workers: 8
|
15 |
+
persistent_workers: false
|
16 |
+
pin_memory: true
|
17 |
+
replacement: false
|
18 |
+
shuffle: true
|
19 |
+
dataset: mnli
|
20 |
+
estimator:
|
21 |
+
accelerator: gpu
|
22 |
+
convert_to_bettertransformer: false
|
23 |
+
deterministic: true
|
24 |
+
precision: bf16-true
|
25 |
+
tf32_mode: high
|
26 |
+
fit:
|
27 |
+
enable_progress_bar: true
|
28 |
+
limit_train_batches: null
|
29 |
+
log_interval: 100
|
30 |
+
max_epochs: 20
|
31 |
+
min_epochs: null
|
32 |
+
optimizer_kwargs:
|
33 |
+
init_kwargs:
|
34 |
+
fused: true
|
35 |
+
lr: 3.0e-05
|
36 |
+
name: adamw
|
37 |
+
scheduler_kwargs:
|
38 |
+
name: constant_schedule_with_warmup
|
39 |
+
num_warmup_steps: 2000
|
40 |
+
model:
|
41 |
+
base_model: roberta-base
|
42 |
+
name: roberta-base
|
43 |
+
revision: null
|
44 |
+
seed: 42
|
45 |
+
seed: 42
|
46 |
+
```
|
hparams.yaml
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
experiment_group: training
|
2 |
+
run_name: roberta-base_2024-04-23T15-11-06
|
3 |
+
seed: 42
|
4 |
+
model:
|
5 |
+
name: roberta-base
|
6 |
+
revision: null
|
7 |
+
seed: 42
|
8 |
+
base_model: roberta-base
|
9 |
+
estimator:
|
10 |
+
accelerator: gpu
|
11 |
+
precision: bf16-true
|
12 |
+
deterministic: true
|
13 |
+
tf32_mode: high
|
14 |
+
convert_to_bettertransformer: false
|
15 |
+
fit:
|
16 |
+
max_epochs: 20
|
17 |
+
min_epochs: null
|
18 |
+
optimizer_kwargs:
|
19 |
+
name: adamw
|
20 |
+
lr: 3.0e-05
|
21 |
+
init_kwargs:
|
22 |
+
fused: true
|
23 |
+
scheduler_kwargs:
|
24 |
+
name: constant_schedule_with_warmup
|
25 |
+
num_warmup_steps: 2000
|
26 |
+
log_interval: 100
|
27 |
+
enable_progress_bar: true
|
28 |
+
limit_train_batches: null
|
29 |
+
data:
|
30 |
+
batch_size: 32
|
31 |
+
eval_batch_size: 128
|
32 |
+
shuffle: true
|
33 |
+
replacement: false
|
34 |
+
data_seed: 42
|
35 |
+
drop_last: false
|
36 |
+
num_workers: 8
|
37 |
+
pin_memory: true
|
38 |
+
persistent_workers: false
|
39 |
+
multiprocessing_context: null
|
40 |
+
max_length: 512
|
41 |
+
root_path: /home/pl487/coreset-project
|
42 |
+
data_path: /home/pl487/coreset-project/data/processed
|
43 |
+
dataset: mnli
|
44 |
+
dataset_split: train
|
45 |
+
evaluation: null
|
46 |
+
loggers:
|
47 |
+
tensorboard:
|
48 |
+
_target_: energizer.loggers.TensorBoardLogger
|
49 |
+
root_dir: ./
|
50 |
+
name: tb_logs
|
51 |
+
version: null
|
52 |
+
callbacks:
|
53 |
+
timer:
|
54 |
+
_target_: energizer.active_learning.callbacks.Timer
|
55 |
+
lr_monitor:
|
56 |
+
_target_: energizer.callbacks.lr_monitor.LearningRateMonitor
|
57 |
+
model_checkpoint:
|
58 |
+
_target_: energizer.callbacks.model_checkpoint.ModelCheckpoint
|
59 |
+
dirpath: .checkpoints
|
60 |
+
stage: train
|
61 |
+
frequency: 1:epoch
|
62 |
+
user:
|
63 |
+
id: pl487
|
main.log
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[2024-04-23 15:11:06,594][hydra][INFO] -
|
2 |
+
experiment_group: training
|
3 |
+
run_name: roberta-base_2024-04-23T15-11-06
|
4 |
+
seed: 42
|
5 |
+
model:
|
6 |
+
name: roberta-base
|
7 |
+
revision: null
|
8 |
+
seed: 42
|
9 |
+
base_model: roberta-base
|
10 |
+
estimator:
|
11 |
+
accelerator: gpu
|
12 |
+
precision: bf16-true
|
13 |
+
deterministic: true
|
14 |
+
tf32_mode: high
|
15 |
+
convert_to_bettertransformer: false
|
16 |
+
fit:
|
17 |
+
max_epochs: 20
|
18 |
+
min_epochs: null
|
19 |
+
optimizer_kwargs:
|
20 |
+
name: adamw
|
21 |
+
lr: 3.0e-05
|
22 |
+
init_kwargs:
|
23 |
+
fused: true
|
24 |
+
scheduler_kwargs:
|
25 |
+
name: constant_schedule_with_warmup
|
26 |
+
num_warmup_steps: 2000
|
27 |
+
log_interval: 100
|
28 |
+
enable_progress_bar: true
|
29 |
+
limit_train_batches: null
|
30 |
+
data:
|
31 |
+
batch_size: 32
|
32 |
+
eval_batch_size: 128
|
33 |
+
shuffle: true
|
34 |
+
replacement: false
|
35 |
+
data_seed: 42
|
36 |
+
drop_last: false
|
37 |
+
num_workers: 8
|
38 |
+
pin_memory: true
|
39 |
+
persistent_workers: false
|
40 |
+
multiprocessing_context: null
|
41 |
+
max_length: 512
|
42 |
+
root_path: /home/pl487/coreset-project
|
43 |
+
data_path: /home/pl487/coreset-project/data/processed
|
44 |
+
dataset: mnli
|
45 |
+
dataset_split: train
|
46 |
+
evaluation: null
|
47 |
+
loggers:
|
48 |
+
tensorboard:
|
49 |
+
_target_: energizer.loggers.TensorBoardLogger
|
50 |
+
root_dir: ./
|
51 |
+
name: tb_logs
|
52 |
+
version: null
|
53 |
+
callbacks:
|
54 |
+
timer:
|
55 |
+
_target_: energizer.active_learning.callbacks.Timer
|
56 |
+
lr_monitor:
|
57 |
+
_target_: energizer.callbacks.lr_monitor.LearningRateMonitor
|
58 |
+
model_checkpoint:
|
59 |
+
_target_: energizer.callbacks.model_checkpoint.ModelCheckpoint
|
60 |
+
dirpath: .checkpoints
|
61 |
+
stage: train
|
62 |
+
frequency: 1:epoch
|
63 |
+
user:
|
64 |
+
id: pl487
|
65 |
+
|
66 |
+
======================================================================
|
67 |
+
[2024-04-23 15:11:06,595][hydra][INFO] - Seed enabled: 42
|
68 |
+
[2024-04-23 15:11:06,963][hydra][INFO] - Label distribution:
|
69 |
+
{<RunningStage.TRAIN: 'train'>: {'0-(entailment)': 130899, '1-(neutral)': 130900, '2-(contradiction)': 130903}}
|
70 |
+
[2024-04-23 15:11:19,109][hydra][INFO] - Loggers: [<energizer.loggers.tensorboard.TensorBoardLogger object at 0x7f86f05beb00>]
|
71 |
+
[2024-04-23 15:11:19,110][hydra][INFO] - Callbacks: [<energizer.active_learning.callbacks.Timer object at 0x7f86deae60b0>, <energizer.callbacks.lr_monitor.LearningRateMonitor object at 0x7f86deae6110>, <energizer.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f86deae6620>]
|
72 |
+
[2024-04-23 15:11:19,113][hydra][INFO] - Model summary:
|
73 |
+
Total num params: 124.6M
|
74 |
+
Of which trainable: 124.6M
|
75 |
+
With a memory footprint of 0.25GB
|
76 |
+
Total memory allocated 0.77GB
|
77 |
+
[2024-04-23 15:11:19,754][hydra][INFO] - Dataloading params:
|
78 |
+
SequenceClassificationDataloaderArgs(batch_size=32, eval_batch_size=128, num_workers=8, pin_memory=True, drop_last=False, persistent_workers=False, shuffle=True, replacement=False, data_seed=42, multiprocessing_context=None, max_length=512)
|
79 |
+
[2024-04-23 15:11:19,760][hydra][INFO] - Batch:
|
80 |
+
{<InputKeys.INPUT_IDS: 'input_ids'>: tensor([[ 0, 3056, 37463, 14, 18, 761, 9, 4678, 939, 1266,
|
81 |
+
51, 214, 190, 3406, 24, 7, 7, 147, 122, 37463,
|
82 |
+
14, 51, 26112, 15, 1012, 47, 216, 114, 110, 114,
|
83 |
+
47, 37463, 47, 216, 33, 626, 42, 50, 114, 47,
|
84 |
+
240, 42, 37463, 37463, 52, 581, 14811, 13, 47, 8,
|
85 |
+
47, 218, 75, 33, 7, 582, 201, 3867, 47, 53,
|
86 |
+
172, 99, 51, 218, 75, 1137, 47, 16, 14, 114,
|
87 |
+
47, 114, 51, 339, 47, 492, 106, 23, 513, 10,
|
88 |
+
371, 9, 5, 9, 5, 631, 14, 51, 339, 98,
|
89 |
+
1437, 939, 218, 75, 216, 24, 16, 37463, 24, 18,
|
90 |
+
562, 7, 28, 55, 265, 122, 1195, 87, 37463, 888,
|
91 |
+
37463, 4098, 19, 5, 1846, 87, 19, 37463, 7252, 5,
|
92 |
+
37463, 8653, 51, 5, 5, 3969, 32, 95, 11, 24,
|
93 |
+
13, 5, 418, 1437, 939, 437, 939, 437, 7013, 939,
|
94 |
+
216, 939, 939, 2854, 19, 47, 939, 206, 47, 214,
|
95 |
+
588, 47, 214, 182, 235, 14, 5, 3770, 197, 939,
|
96 |
+
206, 51, 197, 33, 41, 3871, 1280, 9, 47, 216,
|
97 |
+
2085, 51, 64, 33, 10, 367, 53, 939, 206, 144,
|
98 |
+
9, 106, 197, 28, 45, 37463, 3969, 11, 5, 754,
|
99 |
+
8, 14, 18, 169, 169, 51, 348, 5335, 88, 2302,
|
100 |
+
24, 18, 142, 9, 5, 5, 488, 383, 47, 216,
|
101 |
+
5, 37482, 8, 960, 53, 37463, 53, 117, 939, 216,
|
102 |
+
52, 24, 7252, 52, 214, 11, 4788, 8, 37463, 52,
|
103 |
+
33, 5, 276, 631, 22002, 154, 8, 8, 37463, 51,
|
104 |
+
32, 6901, 106, 66, 939, 1266, 95, 5, 1675, 2878,
|
105 |
+
3645, 631, 51, 905, 106, 66, 142, 9, 51, 218,
|
106 |
+
75, 33, 143, 317, 7, 489, 7, 342, 106, 98,
|
107 |
+
4909, 14, 115, 47, 216, 37463, 3867, 37463, 57, 10,
|
108 |
+
538, 2970, 53, 37463, 37463, 190, 5, 181, 6502, 352,
|
109 |
+
410, 2682, 939, 1266, 5, 5, 1669, 14, 13585, 452,
|
110 |
+
11, 730, 16, 5, 1802, 8, 14051, 14, 189, 28,
|
111 |
+
549, 24, 18, 10, 6279, 50, 2196, 50, 3046, 1493,
|
112 |
+
7252, 51, 32, 5, 1980, 14, 32, 164, 7, 582,
|
113 |
+
8, 51, 32, 5, 65, 14, 32, 164, 7, 6297,
|
114 |
+
8, 5, 97, 621, 47, 216, 114, 51, 582, 114,
|
115 |
+
51, 114, 51, 37463, 2237, 106, 37463, 8, 37463, 172,
|
116 |
+
5, 47, 216, 86, 5, 488, 1239, 81, 37463, 457,
|
117 |
+
5, 86, 37463, 51, 1169, 905, 106, 213, 50, 51,
|
118 |
+
120, 160, 19, 10, 10, 3645, 142, 51, 348, 56,
|
119 |
+
10, 2470, 14, 47, 216, 2653, 14, 14, 51, 3559,
|
120 |
+
75, 70, 561, 77, 51, 222, 24, 2, 2, 100,
|
121 |
+
206, 14, 89, 197, 28, 41, 3871, 8985, 9, 14218,
|
122 |
+
11, 84, 3770, 4, 2]]), <InputKeys.ATT_MASK: 'attention_mask'>: tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
123 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
124 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
125 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
126 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
127 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
128 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
129 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
130 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
131 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
132 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
133 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
134 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
135 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
136 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
137 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
138 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
139 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]), <InputKeys.LABELS: 'labels'>: tensor([0]), <InputKeys.ON_CPU: 'on_cpu'>: {<SpecialKeys.ID: 'uid'>: [221950]}}
|
140 |
+
[2024-04-23 20:57:35,566][hydra][INFO] - Training complete
|
tb_logs/version_0/events.out.tfevents.1713881489.dev-gpu-pl487.1865581.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce788d1e3351858236cf312c2923a9c037c088eb82249cdb01fed9520a256f5f
|
3 |
+
size 1855634
|