End of training
Browse files- .hydra/config.yaml +4 -1
- .hydra/hydra.yaml +5 -5
- .hydra/overrides.yaml +1 -1
- README.md +15 -15
- config.json +0 -1
- configuration_measurement_pred.py +0 -2
- logs/events.out.tfevents.1734630919.gail.ist.berkeley.edu.140348.0 +3 -0
- model.safetensors +1 -1
- modeling_code_gen_measurement_pred.py +7 -1
- modeling_measurement_pred.py +19 -17
- sensor_loc_stories.py +2 -0
- sensor_locs_from_token.py +2 -0
- special_tokens_map.json +1 -1
- tokenizer.json +2 -2
- tokenizer_config.json +1 -1
- train.log +1 -2
.hydra/config.yaml
CHANGED
@@ -1,8 +1,11 @@
|
|
1 |
model:
|
2 |
-
dataset_name: redwoodresearch/diamonds-
|
3 |
model_type: codegen
|
4 |
pretrained_model_name: Salesforce/codegen-350M-mono
|
5 |
max_length: 1024
|
|
|
|
|
|
|
6 |
hparams:
|
7 |
learning_rate: 2.0e-05
|
8 |
weight_decay: 0.02
|
|
|
1 |
model:
|
2 |
+
dataset_name: redwoodresearch/diamonds-seed2
|
3 |
model_type: codegen
|
4 |
pretrained_model_name: Salesforce/codegen-350M-mono
|
5 |
max_length: 1024
|
6 |
+
model_config_params:
|
7 |
+
sensor_loc_type: locs_from_token
|
8 |
+
sensor_token: ' omit'
|
9 |
hparams:
|
10 |
learning_rate: 2.0e-05
|
11 |
weight_decay: 0.02
|
.hydra/hydra.yaml
CHANGED
@@ -137,13 +137,13 @@ hydra:
|
|
137 |
hydra:
|
138 |
- hydra.mode=MULTIRUN
|
139 |
task:
|
140 |
-
- model.dataset_name=redwoodresearch/diamonds-
|
141 |
job:
|
142 |
name: train
|
143 |
chdir: null
|
144 |
-
override_dirname: model.dataset_name=redwoodresearch/diamonds-
|
145 |
-
id: '
|
146 |
-
num:
|
147 |
config_name: codegen_diamonds_slurm
|
148 |
env_set: {}
|
149 |
env_copy: []
|
@@ -166,7 +166,7 @@ hydra:
|
|
166 |
- path: ''
|
167 |
schema: structured
|
168 |
provider: schema
|
169 |
-
output_dir: /nas/ucb/oliveradk/measurement-pred/multirun/2024-12-
|
170 |
choices:
|
171 |
hparams: hparams
|
172 |
model: codegen_diamonds
|
|
|
137 |
hydra:
|
138 |
- hydra.mode=MULTIRUN
|
139 |
task:
|
140 |
+
- model.dataset_name=redwoodresearch/diamonds-seed2
|
141 |
job:
|
142 |
name: train
|
143 |
chdir: null
|
144 |
+
override_dirname: model.dataset_name=redwoodresearch/diamonds-seed2
|
145 |
+
id: '748836_1'
|
146 |
+
num: 1
|
147 |
config_name: codegen_diamonds_slurm
|
148 |
env_set: {}
|
149 |
env_copy: []
|
|
|
166 |
- path: ''
|
167 |
schema: structured
|
168 |
provider: schema
|
169 |
+
output_dir: /nas/ucb/oliveradk/measurement-pred/multirun/2024-12-19/09-54-27/1
|
170 |
choices:
|
171 |
hparams: hparams
|
172 |
model: codegen_diamonds
|
.hydra/overrides.yaml
CHANGED
@@ -1 +1 @@
|
|
1 |
-
- model.dataset_name=redwoodresearch/diamonds-
|
|
|
1 |
+
- model.dataset_name=redwoodresearch/diamonds-seed2
|
README.md
CHANGED
@@ -17,16 +17,16 @@ should probably proofread and complete it, then remove this comment. -->
|
|
17 |
|
18 |
This model is a fine-tuned version of [Salesforce/codegen-350M-mono](https://huggingface.co/Salesforce/codegen-350M-mono) on an unknown dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
-
- Loss: 0.
|
21 |
-
- Accuracy: 0.
|
22 |
-
- Accuracy Sensor 0: 0.
|
23 |
-
- Auroc Sensor 0: 0.
|
24 |
-
- Accuracy Sensor 1: 0.
|
25 |
-
- Auroc Sensor 1: 0.
|
26 |
-
- Accuracy Sensor 2: 0.
|
27 |
-
- Auroc Sensor 2: 0.
|
28 |
-
- Accuracy Aggregated: 0.
|
29 |
-
- Auroc Aggregated: 0.
|
30 |
|
31 |
## Model description
|
32 |
|
@@ -61,11 +61,11 @@ The following hyperparameters were used during training:
|
|
61 |
|
62 |
| Training Loss | Epoch | Step | Validation Loss | Accuracy | Accuracy Sensor 0 | Auroc Sensor 0 | Accuracy Sensor 1 | Auroc Sensor 1 | Accuracy Sensor 2 | Auroc Sensor 2 | Accuracy Aggregated | Auroc Aggregated |
|
63 |
|:-------------:|:------:|:----:|:---------------:|:--------:|:-----------------:|:--------------:|:-----------------:|:--------------:|:-----------------:|:--------------:|:-------------------:|:----------------:|
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
|
70 |
|
71 |
### Framework versions
|
|
|
17 |
|
18 |
This model is a fine-tuned version of [Salesforce/codegen-350M-mono](https://huggingface.co/Salesforce/codegen-350M-mono) on an unknown dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 0.4189
|
21 |
+
- Accuracy: 0.9210
|
22 |
+
- Accuracy Sensor 0: 0.9298
|
23 |
+
- Auroc Sensor 0: 0.9628
|
24 |
+
- Accuracy Sensor 1: 0.9259
|
25 |
+
- Auroc Sensor 1: 0.9711
|
26 |
+
- Accuracy Sensor 2: 0.9266
|
27 |
+
- Auroc Sensor 2: 0.9619
|
28 |
+
- Accuracy Aggregated: 0.9019
|
29 |
+
- Auroc Aggregated: 0.9592
|
30 |
|
31 |
## Model description
|
32 |
|
|
|
61 |
|
62 |
| Training Loss | Epoch | Step | Validation Loss | Accuracy | Accuracy Sensor 0 | Auroc Sensor 0 | Accuracy Sensor 1 | Auroc Sensor 1 | Accuracy Sensor 2 | Auroc Sensor 2 | Accuracy Aggregated | Auroc Aggregated |
|
63 |
|:-------------:|:------:|:----:|:---------------:|:--------:|:-----------------:|:--------------:|:-----------------:|:--------------:|:-----------------:|:--------------:|:-------------------:|:----------------:|
|
64 |
+
| 0.2961 | 0.9997 | 781 | 0.4800 | 0.7906 | 0.8122 | 0.9078 | 0.7952 | 0.9255 | 0.8160 | 0.9280 | 0.7391 | 0.8990 |
|
65 |
+
| 0.1901 | 1.9994 | 1562 | 0.3107 | 0.8847 | 0.9115 | 0.9491 | 0.8649 | 0.9604 | 0.8951 | 0.9532 | 0.8674 | 0.9397 |
|
66 |
+
| 0.1154 | 2.9990 | 2343 | 0.3076 | 0.9009 | 0.9154 | 0.9575 | 0.8946 | 0.9656 | 0.9255 | 0.9576 | 0.8682 | 0.9492 |
|
67 |
+
| 0.0708 | 4.0 | 3125 | 0.3162 | 0.9207 | 0.9297 | 0.9621 | 0.9245 | 0.9710 | 0.9285 | 0.9619 | 0.9001 | 0.9587 |
|
68 |
+
| 0.0314 | 4.9984 | 3905 | 0.4189 | 0.9210 | 0.9298 | 0.9628 | 0.9259 | 0.9711 | 0.9266 | 0.9619 | 0.9019 | 0.9592 |
|
69 |
|
70 |
|
71 |
### Framework versions
|
config.json
CHANGED
@@ -48,7 +48,6 @@
|
|
48 |
"tokenizer_class": "GPT2Tokenizer",
|
49 |
"torch_dtype": "float32",
|
50 |
"transformers_version": "4.41.0",
|
51 |
-
"use_aggregated": true,
|
52 |
"use_cache": false,
|
53 |
"vocab_size": 51200
|
54 |
}
|
|
|
48 |
"tokenizer_class": "GPT2Tokenizer",
|
49 |
"torch_dtype": "float32",
|
50 |
"transformers_version": "4.41.0",
|
|
|
51 |
"use_cache": false,
|
52 |
"vocab_size": 51200
|
53 |
}
|
configuration_measurement_pred.py
CHANGED
@@ -7,7 +7,6 @@ class MeasurementPredictorConfig(PretrainedConfig):
|
|
7 |
sensor_token=" omit",
|
8 |
sensor_loc_type="locs_from_token",
|
9 |
n_sensors=3,
|
10 |
-
use_aggregated=True,
|
11 |
sensors_weight = 0.7,
|
12 |
aggregate_weight=0.3,
|
13 |
**kwargs
|
@@ -15,7 +14,6 @@ class MeasurementPredictorConfig(PretrainedConfig):
|
|
15 |
self.sensor_token = sensor_token
|
16 |
self.sensor_loc_type = sensor_loc_type
|
17 |
self.n_sensors = n_sensors
|
18 |
-
self.use_aggregated = use_aggregated
|
19 |
self.sensors_weight = sensors_weight
|
20 |
self.aggregate_weight = aggregate_weight
|
21 |
super().__init__(**kwargs)
|
|
|
7 |
sensor_token=" omit",
|
8 |
sensor_loc_type="locs_from_token",
|
9 |
n_sensors=3,
|
|
|
10 |
sensors_weight = 0.7,
|
11 |
aggregate_weight=0.3,
|
12 |
**kwargs
|
|
|
14 |
self.sensor_token = sensor_token
|
15 |
self.sensor_loc_type = sensor_loc_type
|
16 |
self.n_sensors = n_sensors
|
|
|
17 |
self.sensors_weight = sensors_weight
|
18 |
self.aggregate_weight = aggregate_weight
|
19 |
super().__init__(**kwargs)
|
logs/events.out.tfevents.1734630919.gail.ist.berkeley.edu.140348.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4c6247b038ae42d4509821b4d01319d171d792d10a51710ab6d0134bc370bed
|
3 |
+
size 16043
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1216963976
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6504413594e6bb22cce3c47736265b71f62a4a3d39ab10a3489af264340bce6
|
3 |
size 1216963976
|
modeling_code_gen_measurement_pred.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from transformers.models.codegen import CodeGenPreTrainedModel, CodeGenModel
|
2 |
-
|
3 |
from .modeling_measurement_pred import MeasurementPredictorMixin
|
4 |
from .configuration_code_gen_measuremet_pred import CodeGenMeasurementPredictorConfig
|
5 |
|
@@ -11,3 +11,9 @@ class CodeGenMeasurementPredictor(CodeGenPreTrainedModel, MeasurementPredictorMi
|
|
11 |
super().__init__(config)
|
12 |
self.transformer = CodeGenModel(config)
|
13 |
self.post_init()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from transformers.models.codegen import CodeGenPreTrainedModel, CodeGenModel
|
2 |
+
from transformers import PreTrainedTokenizerBase
|
3 |
from .modeling_measurement_pred import MeasurementPredictorMixin
|
4 |
from .configuration_code_gen_measuremet_pred import CodeGenMeasurementPredictorConfig
|
5 |
|
|
|
11 |
super().__init__(config)
|
12 |
self.transformer = CodeGenModel(config)
|
13 |
self.post_init()
|
14 |
+
|
15 |
+
def set_pad_token(self, tokenizer: PreTrainedTokenizerBase):
|
16 |
+
pad_token = ' .'
|
17 |
+
pad_token_id = tokenizer.encode(pad_token)[0]
|
18 |
+
tokenizer.pad_token = pad_token
|
19 |
+
tokenizer.pad_token_id = pad_token_id
|
modeling_measurement_pred.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from typing import Optional, Tuple, Union
|
|
|
2 |
|
3 |
import torch
|
4 |
from torch.nn import BCEWithLogitsLoss
|
@@ -20,16 +21,18 @@ class MeasurementPredictorMixin(PreTrainedModel):
|
|
20 |
self.sensor_probes = torch.nn.ModuleList([
|
21 |
torch.nn.Linear(config.emb_dim, 1) for _ in range(config.n_sensors)
|
22 |
])
|
23 |
-
self.
|
24 |
-
if config.use_aggregated:
|
25 |
-
self.aggregate_probe = torch.nn.Linear(config.emb_dim, 1)
|
26 |
self.sensors_weight = config.sensors_weight
|
27 |
self.aggregate_weight = config.aggregate_weight
|
28 |
|
29 |
-
self.
|
|
|
|
|
|
|
|
|
30 |
|
31 |
def init_sensor_loc_finder(self, tokenizer: PreTrainedTokenizerBase):
|
32 |
-
self.
|
33 |
tokenizer, sensor_token=self.sensor_token, n_sensors=self.n_sensors
|
34 |
)
|
35 |
|
@@ -67,28 +70,27 @@ class MeasurementPredictorMixin(PreTrainedModel):
|
|
67 |
output_hidden_states=output_hidden_states,
|
68 |
return_dict=return_dict,
|
69 |
)
|
70 |
-
|
|
|
71 |
sensor_embs = base_model_output.last_hidden_state.gather(
|
72 |
1, sensor_locs.unsqueeze(-1).expand(-1, -1, self.config.emb_dim)
|
73 |
)
|
74 |
-
assert sensor_embs.shape == (input_ids.shape[0], self.n_sensors, self.config.emb_dim),
|
|
|
|
|
75 |
sensor_logits = torch.concat([self.sensor_probes[i](sensor_embs[:, i, :])
|
76 |
for i in range(self.n_sensors)], dim=-1)
|
77 |
-
|
|
|
78 |
|
79 |
-
|
80 |
-
last_emb = base_model_output.last_hidden_state[:, -1, :]
|
81 |
-
aggregate_logits = self.aggregate_probe(last_emb)
|
82 |
-
logits = torch.concat([logits, aggregate_logits], dim=-1)
|
83 |
-
|
84 |
loss = None
|
85 |
if labels is not None:
|
86 |
loss_fct = BCEWithLogitsLoss()
|
87 |
-
sensor_loss = loss_fct(sensor_logits, labels[:, :self.n_sensors]) * self.sensors_weight
|
88 |
loss = sensor_loss
|
89 |
-
|
90 |
-
|
91 |
-
loss += aggregate_loss
|
92 |
|
93 |
if not return_dict:
|
94 |
output = (logits, ) + base_model_output[1:]
|
|
|
1 |
from typing import Optional, Tuple, Union
|
2 |
+
from abc import abstractmethod
|
3 |
|
4 |
import torch
|
5 |
from torch.nn import BCEWithLogitsLoss
|
|
|
21 |
self.sensor_probes = torch.nn.ModuleList([
|
22 |
torch.nn.Linear(config.emb_dim, 1) for _ in range(config.n_sensors)
|
23 |
])
|
24 |
+
self.aggregate_probe = torch.nn.Linear(config.emb_dim, 1)
|
|
|
|
|
25 |
self.sensors_weight = config.sensors_weight
|
26 |
self.aggregate_weight = config.aggregate_weight
|
27 |
|
28 |
+
self.find_sensor_locs: SensorLocFinder = None
|
29 |
+
|
30 |
+
@abstractmethod
|
31 |
+
def set_pad_token(self, tokenizer: PreTrainedTokenizerBase):
|
32 |
+
pass
|
33 |
|
34 |
def init_sensor_loc_finder(self, tokenizer: PreTrainedTokenizerBase):
|
35 |
+
self.find_sensor_locs = SENSOR_LOC_REGISTRY[self.sensor_loc_type](
|
36 |
tokenizer, sensor_token=self.sensor_token, n_sensors=self.n_sensors
|
37 |
)
|
38 |
|
|
|
70 |
output_hidden_states=output_hidden_states,
|
71 |
return_dict=return_dict,
|
72 |
)
|
73 |
+
# get sensor embeddings (including aggregate)
|
74 |
+
sensor_locs = self.find_sensor_locs(input_ids)
|
75 |
sensor_embs = base_model_output.last_hidden_state.gather(
|
76 |
1, sensor_locs.unsqueeze(-1).expand(-1, -1, self.config.emb_dim)
|
77 |
)
|
78 |
+
assert sensor_embs.shape == (input_ids.shape[0], self.n_sensors + 1, self.config.emb_dim), sensor_embs.shape
|
79 |
+
|
80 |
+
# get sensor and aggregate logits
|
81 |
sensor_logits = torch.concat([self.sensor_probes[i](sensor_embs[:, i, :])
|
82 |
for i in range(self.n_sensors)], dim=-1)
|
83 |
+
aggregate_logits = self.aggregate_probe(sensor_embs[:, -1, :])
|
84 |
+
logits = torch.concat([sensor_logits, aggregate_logits], dim=-1)
|
85 |
|
86 |
+
# compute loss
|
|
|
|
|
|
|
|
|
87 |
loss = None
|
88 |
if labels is not None:
|
89 |
loss_fct = BCEWithLogitsLoss()
|
90 |
+
sensor_loss = loss_fct(sensor_logits[:, :self.n_sensors], labels[:, :self.n_sensors]) * self.sensors_weight
|
91 |
loss = sensor_loss
|
92 |
+
aggregate_loss = loss_fct(aggregate_logits, labels[:, -1:]) * self.aggregate_weight
|
93 |
+
loss += aggregate_loss
|
|
|
94 |
|
95 |
if not return_dict:
|
96 |
output = (logits, ) + base_model_output[1:]
|
sensor_loc_stories.py
CHANGED
@@ -26,6 +26,8 @@ class StoriesSensorLocFinder(SensorLocFinder):
|
|
26 |
torch.argmax(eqs.to(torch.uint8), dim=-2),
|
27 |
input_ids.shape[-1] - 3,
|
28 |
).clamp(max=input_ids.shape[-1] - 3)
|
|
|
|
|
29 |
return locs
|
30 |
|
31 |
|
|
|
26 |
torch.argmax(eqs.to(torch.uint8), dim=-2),
|
27 |
input_ids.shape[-1] - 3,
|
28 |
).clamp(max=input_ids.shape[-1] - 3)
|
29 |
+
aggregate_sensor_loc = locs[:, -1].unsqueeze(1)
|
30 |
+
locs = torch.cat([locs, aggregate_sensor_loc], dim=1)
|
31 |
return locs
|
32 |
|
33 |
|
sensor_locs_from_token.py
CHANGED
@@ -13,4 +13,6 @@ class SensorLocFinderFromToken(SensorLocFinder):
|
|
13 |
def find_sensor_locs(self, input_ids: torch.Tensor) -> torch.Tensor:
|
14 |
flat_sensor_token_idxs = (input_ids == self.sensor_token_id).nonzero(as_tuple=True)[1]
|
15 |
sensor_token_idxs = flat_sensor_token_idxs.view(-1, self.n_sensors)
|
|
|
|
|
16 |
return sensor_token_idxs
|
|
|
13 |
def find_sensor_locs(self, input_ids: torch.Tensor) -> torch.Tensor:
|
14 |
flat_sensor_token_idxs = (input_ids == self.sensor_token_id).nonzero(as_tuple=True)[1]
|
15 |
sensor_token_idxs = flat_sensor_token_idxs.view(-1, self.n_sensors)
|
16 |
+
aggregate_sensor_token_idx = sensor_token_idxs[:, -1].unsqueeze(1)
|
17 |
+
sensor_token_idxs = torch.cat([sensor_token_idxs, aggregate_sensor_token_idx], dim=1)
|
18 |
return sensor_token_idxs
|
special_tokens_map.json
CHANGED
@@ -13,7 +13,7 @@
|
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
},
|
16 |
-
"pad_token": "
|
17 |
"unk_token": {
|
18 |
"content": "<|endoftext|>",
|
19 |
"lstrip": false,
|
|
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
},
|
16 |
+
"pad_token": "Ġ.",
|
17 |
"unk_token": {
|
18 |
"content": "<|endoftext|>",
|
19 |
"lstrip": false,
|
tokenizer.json
CHANGED
@@ -12,9 +12,9 @@
|
|
12 |
},
|
13 |
"direction": "Left",
|
14 |
"pad_to_multiple_of": null,
|
15 |
-
"pad_id":
|
16 |
"pad_type_id": 0,
|
17 |
-
"pad_token": "
|
18 |
},
|
19 |
"added_tokens": [
|
20 |
{
|
|
|
12 |
},
|
13 |
"direction": "Left",
|
14 |
"pad_to_multiple_of": null,
|
15 |
+
"pad_id": 764,
|
16 |
"pad_type_id": 0,
|
17 |
+
"pad_token": "Ġ."
|
18 |
},
|
19 |
"added_tokens": [
|
20 |
{
|
tokenizer_config.json
CHANGED
@@ -318,7 +318,7 @@
|
|
318 |
"clean_up_tokenization_spaces": true,
|
319 |
"eos_token": "<|endoftext|>",
|
320 |
"model_max_length": 2048,
|
321 |
-
"pad_token": "
|
322 |
"padding_side": "left",
|
323 |
"return_token_type_ids": false,
|
324 |
"tokenizer_class": "CodeGenTokenizer",
|
|
|
318 |
"clean_up_tokenization_spaces": true,
|
319 |
"eos_token": "<|endoftext|>",
|
320 |
"model_max_length": 2048,
|
321 |
+
"pad_token": "Ġ.",
|
322 |
"padding_side": "left",
|
323 |
"return_token_type_ids": false,
|
324 |
"tokenizer_class": "CodeGenTokenizer",
|
train.log
CHANGED
@@ -1,2 +1 @@
|
|
1 |
-
[2024-12-
|
2 |
-
[2024-12-17 07:27:38,922][accelerate.utils.other][WARNING] - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
|
|
1 |
+
[2024-12-19 09:55:18,350][accelerate.utils.other][WARNING] - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
|