oliverdk commited on
Commit
a66b84c
1 Parent(s): ba25241

End of training

Browse files
.hydra/config.yaml CHANGED
@@ -1,8 +1,11 @@
1
  model:
2
- dataset_name: redwoodresearch/diamonds-seed1
3
  model_type: codegen
4
  pretrained_model_name: Salesforce/codegen-350M-mono
5
  max_length: 1024
 
 
 
6
  hparams:
7
  learning_rate: 2.0e-05
8
  weight_decay: 0.02
 
1
  model:
2
+ dataset_name: redwoodresearch/diamonds-seed2
3
  model_type: codegen
4
  pretrained_model_name: Salesforce/codegen-350M-mono
5
  max_length: 1024
6
+ model_config_params:
7
+ sensor_loc_type: locs_from_token
8
+ sensor_token: ' omit'
9
  hparams:
10
  learning_rate: 2.0e-05
11
  weight_decay: 0.02
.hydra/hydra.yaml CHANGED
@@ -137,13 +137,13 @@ hydra:
137
  hydra:
138
  - hydra.mode=MULTIRUN
139
  task:
140
- - model.dataset_name=redwoodresearch/diamonds-seed1
141
  job:
142
  name: train
143
  chdir: null
144
- override_dirname: model.dataset_name=redwoodresearch/diamonds-seed1
145
- id: '747438'
146
- num: 0
147
  config_name: codegen_diamonds_slurm
148
  env_set: {}
149
  env_copy: []
@@ -166,7 +166,7 @@ hydra:
166
  - path: ''
167
  schema: structured
168
  provider: schema
169
- output_dir: /nas/ucb/oliveradk/measurement-pred/multirun/2024-12-17/07-26-22/0
170
  choices:
171
  hparams: hparams
172
  model: codegen_diamonds
 
137
  hydra:
138
  - hydra.mode=MULTIRUN
139
  task:
140
+ - model.dataset_name=redwoodresearch/diamonds-seed2
141
  job:
142
  name: train
143
  chdir: null
144
+ override_dirname: model.dataset_name=redwoodresearch/diamonds-seed2
145
+ id: '748836_1'
146
+ num: 1
147
  config_name: codegen_diamonds_slurm
148
  env_set: {}
149
  env_copy: []
 
166
  - path: ''
167
  schema: structured
168
  provider: schema
169
+ output_dir: /nas/ucb/oliveradk/measurement-pred/multirun/2024-12-19/09-54-27/1
170
  choices:
171
  hparams: hparams
172
  model: codegen_diamonds
.hydra/overrides.yaml CHANGED
@@ -1 +1 @@
1
- - model.dataset_name=redwoodresearch/diamonds-seed1
 
1
+ - model.dataset_name=redwoodresearch/diamonds-seed2
README.md CHANGED
@@ -17,16 +17,16 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [Salesforce/codegen-350M-mono](https://huggingface.co/Salesforce/codegen-350M-mono) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.4023
21
- - Accuracy: 0.9108
22
- - Accuracy Sensor 0: 0.9220
23
- - Auroc Sensor 0: 0.9580
24
- - Accuracy Sensor 1: 0.9109
25
- - Auroc Sensor 1: 0.9645
26
- - Accuracy Sensor 2: 0.9260
27
- - Auroc Sensor 2: 0.9611
28
- - Accuracy Aggregated: 0.8845
29
- - Auroc Aggregated: 0.9532
30
 
31
  ## Model description
32
 
@@ -61,11 +61,11 @@ The following hyperparameters were used during training:
61
 
62
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | Accuracy Sensor 0 | Auroc Sensor 0 | Accuracy Sensor 1 | Auroc Sensor 1 | Accuracy Sensor 2 | Auroc Sensor 2 | Accuracy Aggregated | Auroc Aggregated |
63
  |:-------------:|:------:|:----:|:---------------:|:--------:|:-----------------:|:--------------:|:-----------------:|:--------------:|:-----------------:|:--------------:|:-------------------:|:----------------:|
64
- | 0.3009 | 0.9997 | 781 | 0.4552 | 0.8074 | 0.8220 | 0.9041 | 0.8092 | 0.9255 | 0.8372 | 0.9304 | 0.7610 | 0.9026 |
65
- | 0.1989 | 1.9994 | 1562 | 0.3633 | 0.8595 | 0.8835 | 0.9425 | 0.8544 | 0.9520 | 0.8757 | 0.9517 | 0.8244 | 0.9351 |
66
- | 0.1335 | 2.9990 | 2343 | 0.3032 | 0.8924 | 0.8985 | 0.9529 | 0.8877 | 0.9608 | 0.9246 | 0.9573 | 0.8588 | 0.9463 |
67
- | 0.093 | 4.0 | 3125 | 0.3016 | 0.9138 | 0.9203 | 0.9581 | 0.9131 | 0.9651 | 0.9304 | 0.9609 | 0.8914 | 0.9529 |
68
- | 0.0432 | 4.9984 | 3905 | 0.4023 | 0.9108 | 0.9220 | 0.9580 | 0.9109 | 0.9645 | 0.9260 | 0.9611 | 0.8845 | 0.9532 |
69
 
70
 
71
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [Salesforce/codegen-350M-mono](https://huggingface.co/Salesforce/codegen-350M-mono) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.4189
21
+ - Accuracy: 0.9210
22
+ - Accuracy Sensor 0: 0.9298
23
+ - Auroc Sensor 0: 0.9628
24
+ - Accuracy Sensor 1: 0.9259
25
+ - Auroc Sensor 1: 0.9711
26
+ - Accuracy Sensor 2: 0.9266
27
+ - Auroc Sensor 2: 0.9619
28
+ - Accuracy Aggregated: 0.9019
29
+ - Auroc Aggregated: 0.9592
30
 
31
  ## Model description
32
 
 
61
 
62
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | Accuracy Sensor 0 | Auroc Sensor 0 | Accuracy Sensor 1 | Auroc Sensor 1 | Accuracy Sensor 2 | Auroc Sensor 2 | Accuracy Aggregated | Auroc Aggregated |
63
  |:-------------:|:------:|:----:|:---------------:|:--------:|:-----------------:|:--------------:|:-----------------:|:--------------:|:-----------------:|:--------------:|:-------------------:|:----------------:|
64
+ | 0.2961 | 0.9997 | 781 | 0.4800 | 0.7906 | 0.8122 | 0.9078 | 0.7952 | 0.9255 | 0.8160 | 0.9280 | 0.7391 | 0.8990 |
65
+ | 0.1901 | 1.9994 | 1562 | 0.3107 | 0.8847 | 0.9115 | 0.9491 | 0.8649 | 0.9604 | 0.8951 | 0.9532 | 0.8674 | 0.9397 |
66
+ | 0.1154 | 2.9990 | 2343 | 0.3076 | 0.9009 | 0.9154 | 0.9575 | 0.8946 | 0.9656 | 0.9255 | 0.9576 | 0.8682 | 0.9492 |
67
+ | 0.0708 | 4.0 | 3125 | 0.3162 | 0.9207 | 0.9297 | 0.9621 | 0.9245 | 0.9710 | 0.9285 | 0.9619 | 0.9001 | 0.9587 |
68
+ | 0.0314 | 4.9984 | 3905 | 0.4189 | 0.9210 | 0.9298 | 0.9628 | 0.9259 | 0.9711 | 0.9266 | 0.9619 | 0.9019 | 0.9592 |
69
 
70
 
71
  ### Framework versions
config.json CHANGED
@@ -48,7 +48,6 @@
48
  "tokenizer_class": "GPT2Tokenizer",
49
  "torch_dtype": "float32",
50
  "transformers_version": "4.41.0",
51
- "use_aggregated": true,
52
  "use_cache": false,
53
  "vocab_size": 51200
54
  }
 
48
  "tokenizer_class": "GPT2Tokenizer",
49
  "torch_dtype": "float32",
50
  "transformers_version": "4.41.0",
 
51
  "use_cache": false,
52
  "vocab_size": 51200
53
  }
configuration_measurement_pred.py CHANGED
@@ -7,7 +7,6 @@ class MeasurementPredictorConfig(PretrainedConfig):
7
  sensor_token=" omit",
8
  sensor_loc_type="locs_from_token",
9
  n_sensors=3,
10
- use_aggregated=True,
11
  sensors_weight = 0.7,
12
  aggregate_weight=0.3,
13
  **kwargs
@@ -15,7 +14,6 @@ class MeasurementPredictorConfig(PretrainedConfig):
15
  self.sensor_token = sensor_token
16
  self.sensor_loc_type = sensor_loc_type
17
  self.n_sensors = n_sensors
18
- self.use_aggregated = use_aggregated
19
  self.sensors_weight = sensors_weight
20
  self.aggregate_weight = aggregate_weight
21
  super().__init__(**kwargs)
 
7
  sensor_token=" omit",
8
  sensor_loc_type="locs_from_token",
9
  n_sensors=3,
 
10
  sensors_weight = 0.7,
11
  aggregate_weight=0.3,
12
  **kwargs
 
14
  self.sensor_token = sensor_token
15
  self.sensor_loc_type = sensor_loc_type
16
  self.n_sensors = n_sensors
 
17
  self.sensors_weight = sensors_weight
18
  self.aggregate_weight = aggregate_weight
19
  super().__init__(**kwargs)
logs/events.out.tfevents.1734630919.gail.ist.berkeley.edu.140348.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4c6247b038ae42d4509821b4d01319d171d792d10a51710ab6d0134bc370bed
3
+ size 16043
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57651a18b0a367e6c0ffaa97666d103bcf42708a6d8abab94d3cf7704204ad7d
3
  size 1216963976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6504413594e6bb22cce3c47736265b71f62a4a3d39ab10a3489af264340bce6
3
  size 1216963976
modeling_code_gen_measurement_pred.py CHANGED
@@ -1,5 +1,5 @@
1
  from transformers.models.codegen import CodeGenPreTrainedModel, CodeGenModel
2
-
3
  from .modeling_measurement_pred import MeasurementPredictorMixin
4
  from .configuration_code_gen_measuremet_pred import CodeGenMeasurementPredictorConfig
5
 
@@ -11,3 +11,9 @@ class CodeGenMeasurementPredictor(CodeGenPreTrainedModel, MeasurementPredictorMi
11
  super().__init__(config)
12
  self.transformer = CodeGenModel(config)
13
  self.post_init()
 
 
 
 
 
 
 
1
  from transformers.models.codegen import CodeGenPreTrainedModel, CodeGenModel
2
+ from transformers import PreTrainedTokenizerBase
3
  from .modeling_measurement_pred import MeasurementPredictorMixin
4
  from .configuration_code_gen_measuremet_pred import CodeGenMeasurementPredictorConfig
5
 
 
11
  super().__init__(config)
12
  self.transformer = CodeGenModel(config)
13
  self.post_init()
14
+
15
+ def set_pad_token(self, tokenizer: PreTrainedTokenizerBase):
16
+ pad_token = ' .'
17
+ pad_token_id = tokenizer.encode(pad_token)[0]
18
+ tokenizer.pad_token = pad_token
19
+ tokenizer.pad_token_id = pad_token_id
modeling_measurement_pred.py CHANGED
@@ -1,4 +1,5 @@
1
  from typing import Optional, Tuple, Union
 
2
 
3
  import torch
4
  from torch.nn import BCEWithLogitsLoss
@@ -20,16 +21,18 @@ class MeasurementPredictorMixin(PreTrainedModel):
20
  self.sensor_probes = torch.nn.ModuleList([
21
  torch.nn.Linear(config.emb_dim, 1) for _ in range(config.n_sensors)
22
  ])
23
- self.use_aggregated = config.use_aggregated
24
- if config.use_aggregated:
25
- self.aggregate_probe = torch.nn.Linear(config.emb_dim, 1)
26
  self.sensors_weight = config.sensors_weight
27
  self.aggregate_weight = config.aggregate_weight
28
 
29
- self.get_sensor_locs: SensorLocFinder = None
 
 
 
 
30
 
31
  def init_sensor_loc_finder(self, tokenizer: PreTrainedTokenizerBase):
32
- self.get_sensor_locs = SENSOR_LOC_REGISTRY[self.sensor_loc_type](
33
  tokenizer, sensor_token=self.sensor_token, n_sensors=self.n_sensors
34
  )
35
 
@@ -67,28 +70,27 @@ class MeasurementPredictorMixin(PreTrainedModel):
67
  output_hidden_states=output_hidden_states,
68
  return_dict=return_dict,
69
  )
70
- sensor_locs = self.get_sensor_locs(input_ids)
 
71
  sensor_embs = base_model_output.last_hidden_state.gather(
72
  1, sensor_locs.unsqueeze(-1).expand(-1, -1, self.config.emb_dim)
73
  )
74
- assert sensor_embs.shape == (input_ids.shape[0], self.n_sensors, self.config.emb_dim), f"{sensor_embs.shape} != {(input_ids.shape[0], self.n_sensors, self.config.emb_dim)}"
 
 
75
  sensor_logits = torch.concat([self.sensor_probes[i](sensor_embs[:, i, :])
76
  for i in range(self.n_sensors)], dim=-1)
77
- logits = sensor_logits
 
78
 
79
- if self.use_aggregated:
80
- last_emb = base_model_output.last_hidden_state[:, -1, :]
81
- aggregate_logits = self.aggregate_probe(last_emb)
82
- logits = torch.concat([logits, aggregate_logits], dim=-1)
83
-
84
  loss = None
85
  if labels is not None:
86
  loss_fct = BCEWithLogitsLoss()
87
- sensor_loss = loss_fct(sensor_logits, labels[:, :self.n_sensors]) * self.sensors_weight
88
  loss = sensor_loss
89
- if self.use_aggregated: #TOOD: should be use aggregate
90
- aggregate_loss = loss_fct(aggregate_logits, labels[:, -1:]) * self.aggregate_weight
91
- loss += aggregate_loss
92
 
93
  if not return_dict:
94
  output = (logits, ) + base_model_output[1:]
 
1
  from typing import Optional, Tuple, Union
2
+ from abc import abstractmethod
3
 
4
  import torch
5
  from torch.nn import BCEWithLogitsLoss
 
21
  self.sensor_probes = torch.nn.ModuleList([
22
  torch.nn.Linear(config.emb_dim, 1) for _ in range(config.n_sensors)
23
  ])
24
+ self.aggregate_probe = torch.nn.Linear(config.emb_dim, 1)
 
 
25
  self.sensors_weight = config.sensors_weight
26
  self.aggregate_weight = config.aggregate_weight
27
 
28
+ self.find_sensor_locs: SensorLocFinder = None
29
+
30
+ @abstractmethod
31
+ def set_pad_token(self, tokenizer: PreTrainedTokenizerBase):
32
+ pass
33
 
34
  def init_sensor_loc_finder(self, tokenizer: PreTrainedTokenizerBase):
35
+ self.find_sensor_locs = SENSOR_LOC_REGISTRY[self.sensor_loc_type](
36
  tokenizer, sensor_token=self.sensor_token, n_sensors=self.n_sensors
37
  )
38
 
 
70
  output_hidden_states=output_hidden_states,
71
  return_dict=return_dict,
72
  )
73
+ # get sensor embeddings (including aggregate)
74
+ sensor_locs = self.find_sensor_locs(input_ids)
75
  sensor_embs = base_model_output.last_hidden_state.gather(
76
  1, sensor_locs.unsqueeze(-1).expand(-1, -1, self.config.emb_dim)
77
  )
78
+ assert sensor_embs.shape == (input_ids.shape[0], self.n_sensors + 1, self.config.emb_dim), sensor_embs.shape
79
+
80
+ # get sensor and aggregate logits
81
  sensor_logits = torch.concat([self.sensor_probes[i](sensor_embs[:, i, :])
82
  for i in range(self.n_sensors)], dim=-1)
83
+ aggregate_logits = self.aggregate_probe(sensor_embs[:, -1, :])
84
+ logits = torch.concat([sensor_logits, aggregate_logits], dim=-1)
85
 
86
+ # compute loss
 
 
 
 
87
  loss = None
88
  if labels is not None:
89
  loss_fct = BCEWithLogitsLoss()
90
+ sensor_loss = loss_fct(sensor_logits[:, :self.n_sensors], labels[:, :self.n_sensors]) * self.sensors_weight
91
  loss = sensor_loss
92
+ aggregate_loss = loss_fct(aggregate_logits, labels[:, -1:]) * self.aggregate_weight
93
+ loss += aggregate_loss
 
94
 
95
  if not return_dict:
96
  output = (logits, ) + base_model_output[1:]
sensor_loc_stories.py CHANGED
@@ -26,6 +26,8 @@ class StoriesSensorLocFinder(SensorLocFinder):
26
  torch.argmax(eqs.to(torch.uint8), dim=-2),
27
  input_ids.shape[-1] - 3,
28
  ).clamp(max=input_ids.shape[-1] - 3)
 
 
29
  return locs
30
 
31
 
 
26
  torch.argmax(eqs.to(torch.uint8), dim=-2),
27
  input_ids.shape[-1] - 3,
28
  ).clamp(max=input_ids.shape[-1] - 3)
29
+ aggregate_sensor_loc = locs[:, -1].unsqueeze(1)
30
+ locs = torch.cat([locs, aggregate_sensor_loc], dim=1)
31
  return locs
32
 
33
 
sensor_locs_from_token.py CHANGED
@@ -13,4 +13,6 @@ class SensorLocFinderFromToken(SensorLocFinder):
13
  def find_sensor_locs(self, input_ids: torch.Tensor) -> torch.Tensor:
14
  flat_sensor_token_idxs = (input_ids == self.sensor_token_id).nonzero(as_tuple=True)[1]
15
  sensor_token_idxs = flat_sensor_token_idxs.view(-1, self.n_sensors)
 
 
16
  return sensor_token_idxs
 
13
  def find_sensor_locs(self, input_ids: torch.Tensor) -> torch.Tensor:
14
  flat_sensor_token_idxs = (input_ids == self.sensor_token_id).nonzero(as_tuple=True)[1]
15
  sensor_token_idxs = flat_sensor_token_idxs.view(-1, self.n_sensors)
16
+ aggregate_sensor_token_idx = sensor_token_idxs[:, -1].unsqueeze(1)
17
+ sensor_token_idxs = torch.cat([sensor_token_idxs, aggregate_sensor_token_idx], dim=1)
18
  return sensor_token_idxs
special_tokens_map.json CHANGED
@@ -13,7 +13,7 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|endoftext|>",
17
  "unk_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "Ġ.",
17
  "unk_token": {
18
  "content": "<|endoftext|>",
19
  "lstrip": false,
tokenizer.json CHANGED
@@ -12,9 +12,9 @@
12
  },
13
  "direction": "Left",
14
  "pad_to_multiple_of": null,
15
- "pad_id": 50256,
16
  "pad_type_id": 0,
17
- "pad_token": "<|endoftext|>"
18
  },
19
  "added_tokens": [
20
  {
 
12
  },
13
  "direction": "Left",
14
  "pad_to_multiple_of": null,
15
+ "pad_id": 764,
16
  "pad_type_id": 0,
17
+ "pad_token": "Ġ."
18
  },
19
  "added_tokens": [
20
  {
tokenizer_config.json CHANGED
@@ -318,7 +318,7 @@
318
  "clean_up_tokenization_spaces": true,
319
  "eos_token": "<|endoftext|>",
320
  "model_max_length": 2048,
321
- "pad_token": "<|endoftext|>",
322
  "padding_side": "left",
323
  "return_token_type_ids": false,
324
  "tokenizer_class": "CodeGenTokenizer",
 
318
  "clean_up_tokenization_spaces": true,
319
  "eos_token": "<|endoftext|>",
320
  "model_max_length": 2048,
321
+ "pad_token": "Ġ.",
322
  "padding_side": "left",
323
  "return_token_type_ids": false,
324
  "tokenizer_class": "CodeGenTokenizer",
train.log CHANGED
@@ -1,2 +1 @@
1
- [2024-12-17 07:27:38,728][accelerate.utils.other][WARNING] - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
2
- [2024-12-17 07:27:38,922][accelerate.utils.other][WARNING] - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
 
1
+ [2024-12-19 09:55:18,350][accelerate.utils.other][WARNING] - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.