pietrolesci
commited on
Commit
•
629193a
1
Parent(s):
8053734
Upload folder using huggingface_hub
Browse files- README.md +44 -0
- hparams.yaml +60 -0
- tb_logs/version_0/events.out.tfevents.1709573676.dev-gpu-pl487.42005.0 +3 -0
- tensorboard_logs.parquet +3 -0
- train.log +139 -0
README.md
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Run info
|
2 |
+
- complete_hash: cdc7e895a985daa842292b6780da0d50
|
3 |
+
- short_hash: cdc7ea0d50
|
4 |
+
|
5 |
+
### Configuration```yaml
|
6 |
+
data:
|
7 |
+
batch_size: 32
|
8 |
+
data_seed: 42
|
9 |
+
drop_last: false
|
10 |
+
eval_batch_size: 128
|
11 |
+
max_length: 512
|
12 |
+
multiprocessing_context: null
|
13 |
+
num_workers: 8
|
14 |
+
persistent_workers: false
|
15 |
+
pin_memory: true
|
16 |
+
replacement: false
|
17 |
+
shuffle: true
|
18 |
+
dataset: mnli
|
19 |
+
estimator:
|
20 |
+
accelerator: gpu
|
21 |
+
convert_to_bettertransformer: false
|
22 |
+
deterministic: true
|
23 |
+
precision: bf16-true
|
24 |
+
tf32_mode: high
|
25 |
+
fit:
|
26 |
+
enable_progress_bar: true
|
27 |
+
limit_train_batches: null
|
28 |
+
limit_validation_batches: null
|
29 |
+
log_interval: 100
|
30 |
+
max_epochs: 20
|
31 |
+
optimizer_kwargs:
|
32 |
+
init_kwargs:
|
33 |
+
fused: true
|
34 |
+
lr: 3.0e-05
|
35 |
+
name: adamw
|
36 |
+
scheduler_kwargs:
|
37 |
+
name: constant_schedule_with_warmup
|
38 |
+
num_warmup_steps: 2000
|
39 |
+
model:
|
40 |
+
name: bert-tiny
|
41 |
+
revision: null
|
42 |
+
seed: 42
|
43 |
+
seed: 42
|
44 |
+
```
|
hparams.yaml
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
estimator:
|
2 |
+
accelerator: gpu
|
3 |
+
precision: bf16-true
|
4 |
+
deterministic: true
|
5 |
+
tf32_mode: high
|
6 |
+
convert_to_bettertransformer: false
|
7 |
+
callbacks:
|
8 |
+
timer:
|
9 |
+
_target_: energizer.active_learning.callbacks.Timer
|
10 |
+
lr_monitor:
|
11 |
+
_target_: energizer.callbacks.lr_monitor.LearningRateMonitor
|
12 |
+
model_checkpoint:
|
13 |
+
_target_: energizer.callbacks.model_checkpoint.ModelCheckpoint
|
14 |
+
dirpath: .checkpoints
|
15 |
+
stage: train
|
16 |
+
frequency: 1:epoch
|
17 |
+
loggers:
|
18 |
+
tensorboard:
|
19 |
+
_target_: energizer.loggers.TensorBoardLogger
|
20 |
+
root_dir: ./
|
21 |
+
name: tb_logs
|
22 |
+
version: null
|
23 |
+
data:
|
24 |
+
batch_size: 32
|
25 |
+
eval_batch_size: 128
|
26 |
+
shuffle: true
|
27 |
+
replacement: false
|
28 |
+
data_seed: 42
|
29 |
+
drop_last: false
|
30 |
+
num_workers: 8
|
31 |
+
pin_memory: true
|
32 |
+
persistent_workers: false
|
33 |
+
multiprocessing_context: null
|
34 |
+
max_length: 512
|
35 |
+
fit:
|
36 |
+
max_epochs: 20
|
37 |
+
optimizer_kwargs:
|
38 |
+
name: adamw
|
39 |
+
lr: 3.0e-05
|
40 |
+
init_kwargs:
|
41 |
+
fused: true
|
42 |
+
scheduler_kwargs:
|
43 |
+
name: constant_schedule_with_warmup
|
44 |
+
num_warmup_steps: 2000
|
45 |
+
log_interval: 100
|
46 |
+
enable_progress_bar: true
|
47 |
+
limit_train_batches: null
|
48 |
+
limit_validation_batches: null
|
49 |
+
model:
|
50 |
+
name: bert-tiny
|
51 |
+
revision: null
|
52 |
+
seed: 42
|
53 |
+
log_interval: 100
|
54 |
+
enable_progress_bar: true
|
55 |
+
limit_batches: null
|
56 |
+
seed: 42
|
57 |
+
experiment_group: training
|
58 |
+
run_name: bert-tiny_2024-03-04T17-34-08
|
59 |
+
data_path: /home/pl487/coreset-project/data/processed
|
60 |
+
dataset: mnli
|
tb_logs/version_0/events.out.tfevents.1709573676.dev-gpu-pl487.42005.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:063115d5e55766e4effd3903de1c8533a552d2d5d4e97c650d2b5f1c65203424
|
3 |
+
size 404506
|
tensorboard_logs.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:863f97419d0825eef7df6ec77877f6b024e95237f8b2435f432c2ef0ed65e80c
|
3 |
+
size 40157
|
train.log
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[2024-03-04 17:34:08,795][hydra][INFO] -
|
2 |
+
estimator:
|
3 |
+
accelerator: gpu
|
4 |
+
precision: bf16-true
|
5 |
+
deterministic: true
|
6 |
+
tf32_mode: high
|
7 |
+
convert_to_bettertransformer: false
|
8 |
+
callbacks:
|
9 |
+
timer:
|
10 |
+
_target_: energizer.active_learning.callbacks.Timer
|
11 |
+
lr_monitor:
|
12 |
+
_target_: energizer.callbacks.lr_monitor.LearningRateMonitor
|
13 |
+
model_checkpoint:
|
14 |
+
_target_: energizer.callbacks.model_checkpoint.ModelCheckpoint
|
15 |
+
dirpath: .checkpoints
|
16 |
+
stage: train
|
17 |
+
frequency: 1:epoch
|
18 |
+
loggers:
|
19 |
+
tensorboard:
|
20 |
+
_target_: energizer.loggers.TensorBoardLogger
|
21 |
+
root_dir: ./
|
22 |
+
name: tb_logs
|
23 |
+
version: null
|
24 |
+
data:
|
25 |
+
batch_size: 32
|
26 |
+
eval_batch_size: 128
|
27 |
+
shuffle: true
|
28 |
+
replacement: false
|
29 |
+
data_seed: 42
|
30 |
+
drop_last: false
|
31 |
+
num_workers: 8
|
32 |
+
pin_memory: true
|
33 |
+
persistent_workers: false
|
34 |
+
multiprocessing_context: null
|
35 |
+
max_length: 512
|
36 |
+
fit:
|
37 |
+
max_epochs: 20
|
38 |
+
optimizer_kwargs:
|
39 |
+
name: adamw
|
40 |
+
lr: 3.0e-05
|
41 |
+
init_kwargs:
|
42 |
+
fused: true
|
43 |
+
scheduler_kwargs:
|
44 |
+
name: constant_schedule_with_warmup
|
45 |
+
num_warmup_steps: 2000
|
46 |
+
log_interval: 100
|
47 |
+
enable_progress_bar: true
|
48 |
+
limit_train_batches: null
|
49 |
+
limit_validation_batches: null
|
50 |
+
model:
|
51 |
+
name: bert-tiny
|
52 |
+
revision: null
|
53 |
+
seed: 42
|
54 |
+
log_interval: 100
|
55 |
+
enable_progress_bar: true
|
56 |
+
limit_batches: null
|
57 |
+
seed: 42
|
58 |
+
experiment_group: training
|
59 |
+
run_name: bert-tiny_2024-03-04T17-34-08
|
60 |
+
data_path: /home/pl487/coreset-project/data/processed
|
61 |
+
dataset: mnli
|
62 |
+
|
63 |
+
======================================================================
|
64 |
+
[2024-03-04 17:34:08,796][hydra][INFO] - Seed enabled: 42
|
65 |
+
[2024-03-04 17:34:09,910][hydra][INFO] - Label distribution:
|
66 |
+
{<RunningStage.TRAIN: 'train'>: {'0-(entailment)': 130899, '1-(neutral)': 130900, '2-(contradiction)': 130903}}
|
67 |
+
[2024-03-04 17:34:21,700][hydra][INFO] - Loggers: [<energizer.loggers.tensorboard.TensorBoardLogger object at 0x7f79509062f0>]
|
68 |
+
[2024-03-04 17:34:21,700][hydra][INFO] - Callbacks: [<energizer.active_learning.callbacks.Timer object at 0x7f792e9ecfd0>, <energizer.callbacks.lr_monitor.LearningRateMonitor object at 0x7f792e9ed030>, <energizer.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f792e9ed540>]
|
69 |
+
[2024-03-04 17:34:21,702][hydra][INFO] - Model summary:
|
70 |
+
Total num params: 4.4M
|
71 |
+
Of which trainable: 4.4M
|
72 |
+
With a memory footprint of 0.01GB
|
73 |
+
Total memory allocated 0.03GB
|
74 |
+
[2024-03-04 17:34:21,702][hydra][INFO] - Dataloading params:
|
75 |
+
SequenceClassificationDataloaderArgs(batch_size=32, eval_batch_size=128, num_workers=8, pin_memory=True, drop_last=False, persistent_workers=False, shuffle=True, replacement=False, data_seed=42, multiprocessing_context=None, max_length=512)
|
76 |
+
[2024-03-04 17:34:21,737][hydra][INFO] - Batch:
|
77 |
+
{<InputKeys.INPUT_IDS: 'input_ids'>: tensor([[ 101, 2092, 7910, 2008, 1005, 1055, 2785, 1997, 5793, 1045,
|
78 |
+
2812, 2027, 1005, 2128, 2130, 4755, 2009, 2000, 2000, 2073,
|
79 |
+
2085, 7910, 2008, 2027, 4748, 16874, 5562, 2006, 2694, 2017,
|
80 |
+
2113, 2065, 2115, 2065, 2017, 7910, 2017, 2113, 2031, 2589,
|
81 |
+
2023, 2030, 2065, 2017, 2342, 2023, 7910, 7910, 2057, 1005,
|
82 |
+
2222, 9790, 2005, 2017, 1998, 2017, 2123, 1005, 1056, 2031,
|
83 |
+
2000, 3477, 2149, 4983, 2017, 2021, 2059, 2054, 2027, 2123,
|
84 |
+
1005, 1056, 2425, 2017, 2003, 2008, 2065, 2017, 2065, 2027,
|
85 |
+
2663, 2017, 2507, 2068, 2012, 2560, 1037, 2353, 1997, 1996,
|
86 |
+
1997, 1996, 2518, 2008, 2027, 2663, 2061, 1045, 2123, 1005,
|
87 |
+
1056, 2113, 2009, 2003, 7910, 2009, 1005, 1055, 2893, 2000,
|
88 |
+
2022, 2062, 2449, 2085, 2738, 2084, 7910, 2941, 7910, 7149,
|
89 |
+
2007, 1996, 4126, 2084, 2007, 7910, 8529, 1996, 7910, 7750,
|
90 |
+
2027, 1996, 1996, 9559, 2024, 2074, 1999, 2009, 2005, 1996,
|
91 |
+
2769, 1045, 1005, 1049, 1045, 1005, 1049, 6427, 1045, 2113,
|
92 |
+
1045, 1045, 5993, 2007, 2017, 1045, 2228, 2017, 1005, 2128,
|
93 |
+
2613, 2017, 1005, 2128, 2200, 2157, 2008, 1996, 8801, 2323,
|
94 |
+
1045, 2228, 2027, 2323, 2031, 2019, 5020, 3815, 1997, 2017,
|
95 |
+
2113, 2672, 2027, 2064, 2031, 1037, 2261, 2021, 1045, 2228,
|
96 |
+
2087, 1997, 2068, 2323, 2022, 2025, 7910, 9559, 1999, 1996,
|
97 |
+
2755, 1998, 2008, 1005, 1055, 2126, 2126, 2027, 1005, 2310,
|
98 |
+
5407, 2046, 4331, 2009, 1005, 1055, 2138, 1997, 1996, 1996,
|
99 |
+
2375, 2477, 2017, 2113, 1996, 15932, 1998, 2673, 2021, 7910,
|
100 |
+
2021, 2053, 1045, 2113, 2057, 2009, 8529, 2057, 1005, 2128,
|
101 |
+
1999, 5374, 1998, 7910, 2057, 2031, 1996, 2168, 2518, 2058,
|
102 |
+
24375, 4667, 1998, 1998, 7910, 2027, 2024, 5599, 2068, 2041,
|
103 |
+
1045, 2812, 2074, 1996, 3180, 7173, 6251, 2518, 2027, 2292,
|
104 |
+
2068, 2041, 2138, 1997, 2027, 2123, 1005, 1056, 2031, 2151,
|
105 |
+
2173, 2000, 2562, 2000, 2404, 2068, 2061, 8307, 2008, 2071,
|
106 |
+
2017, 2113, 7910, 4983, 7910, 2042, 1037, 2350, 10048, 2021,
|
107 |
+
7910, 7910, 2130, 1996, 14255, 14141, 2135, 2210, 4933, 1045,
|
108 |
+
2812, 1996, 1996, 4364, 2008, 12386, 2651, 1999, 2637, 2003,
|
109 |
+
1996, 6778, 1998, 9444, 2008, 2089, 2022, 3251, 2009, 1005,
|
110 |
+
1055, 1037, 13742, 2030, 5850, 2030, 3649, 2842, 8529, 2027,
|
111 |
+
2024, 1996, 3924, 2008, 2024, 2183, 2000, 3477, 1998, 2027,
|
112 |
+
2024, 1996, 2028, 2008, 2024, 2183, 2000, 9015, 1998, 1996,
|
113 |
+
2060, 2711, 2017, 2113, 2065, 2027, 3477, 2065, 2027, 2065,
|
114 |
+
2027, 7910, 6545, 2068, 7910, 1998, 7910, 2059, 1996, 2017,
|
115 |
+
2113, 2051, 1996, 2375, 3138, 2058, 7910, 2431, 1996, 2051,
|
116 |
+
7910, 2027, 2593, 2292, 2068, 2175, 2030, 2027, 2131, 2125,
|
117 |
+
2007, 1037, 1037, 6251, 2138, 2027, 1005, 2310, 2018, 1037,
|
118 |
+
5160, 2008, 2017, 2113, 5683, 2008, 2008, 2027, 4694, 1005,
|
119 |
+
1056, 2035, 2362, 2043, 2027, 2106, 2009, 102, 1045, 2228,
|
120 |
+
2008, 2045, 2323, 2022, 2019, 5020, 6630, 1997, 15406, 1999,
|
121 |
+
2256, 8801, 1012, 102]]), <InputKeys.ATT_MASK: 'attention_mask'>: tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
122 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
123 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
124 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
125 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
126 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
127 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
128 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
129 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
130 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
131 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
132 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
133 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
134 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
135 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
136 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
137 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
138 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
139 |
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]), <InputKeys.LABELS: 'labels'>: tensor([0]), <InputKeys.ON_CPU: 'on_cpu'>: {<SpecialKeys.ID: 'uid'>: [221950]}}
|