sayanbanerjee32 commited on
Commit
a3728b2
1 Parent(s): 4b0b5de

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
  "v_proj",
25
- "up_proj",
26
- "k_proj",
27
  "gate_proj",
28
  "down_proj",
29
- "o_proj"
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "v_proj",
 
 
24
  "gate_proj",
25
  "down_proj",
26
+ "q_proj",
27
+ "o_proj",
28
+ "k_proj",
29
+ "up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d1f2f5cfc5826a2b2fef3a62b0c9435c2c082d599ec3d97712eb11bec1ab2af
3
  size 35669232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edfb040750ea22cc194598c6f5a6c2e7f554d203f21d0541477961960926719e
3
  size 35669232
image_projector.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67ace4e904ebf5e0524ca78eeb7d9899e1d5f6a4d84135469950b27c791e10a9
3
  size 29379352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ebab6acad7c435a7f377ae18e8625635aba4122c811632caba17a9a0439fba0
3
  size 29379352
lora_weights.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05190f07b7d3f2c882001373a6156ca05d7ee7a5324beddc57959449720c41e6
3
  size 35697862
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a34b2ae4068c112b000ec9c23d72c44e2b0119e44037d1ad8dfc3f42f7e429f
3
  size 35697862
trainer_state.json CHANGED
@@ -1,181 +1,141 @@
1
  {
2
- "epoch": 0.16309412861137,
3
  "global_step": 350,
4
- "max_steps": 2146,
5
- "logging_steps": 25,
6
  "eval_steps": 50,
7
- "save_steps": 25,
8
  "train_batch_size": 8,
9
  "num_train_epochs": 1,
10
  "num_input_tokens_seen": 0,
11
- "total_flos": 1.6047778948703846e+17,
12
  "log_history": [
13
  {
14
- "loss": 3.9545,
15
- "grad_norm": 20.241615295410156,
16
- "learning_rate": 5.8139534883720933e-05,
17
- "epoch": 0.011649580615097856,
18
- "step": 25
19
- },
20
- {
21
- "loss": 0.6469,
22
- "grad_norm": 0.6640440821647644,
23
- "learning_rate": 0.00011627906976744187,
24
- "epoch": 0.023299161230195712,
25
  "step": 50
26
  },
27
  {
28
- "eval_loss": 0.332077294588089,
29
- "eval_runtime": 1616.9669,
30
- "eval_samples_per_second": 0.558,
31
- "eval_steps_per_second": 0.14,
32
- "epoch": 0.023299161230195712,
33
  "step": 50
34
  },
35
  {
36
- "loss": 0.1711,
37
- "grad_norm": 0.0800417810678482,
38
- "learning_rate": 0.0001744186046511628,
39
- "epoch": 0.03494874184529357,
40
- "step": 75
41
- },
42
- {
43
- "loss": 0.4039,
44
- "grad_norm": 0.19737310707569122,
45
- "learning_rate": 0.00023255813953488373,
46
- "epoch": 0.046598322460391424,
47
  "step": 100
48
  },
49
  {
50
- "eval_loss": 0.6975870728492737,
51
- "eval_runtime": 1615.7283,
52
- "eval_samples_per_second": 0.559,
53
- "eval_steps_per_second": 0.14,
54
- "epoch": 0.046598322460391424,
55
  "step": 100
56
  },
57
  {
58
- "loss": 0.2002,
59
- "grad_norm": 0.0352032296359539,
60
- "learning_rate": 0.00029069767441860465,
61
- "epoch": 0.058247903075489285,
62
- "step": 125
63
- },
64
- {
65
- "loss": 0.3939,
66
- "grad_norm": 0.17062194645404816,
67
- "learning_rate": 0.0003488372093023256,
68
- "epoch": 0.06989748369058714,
69
  "step": 150
70
  },
71
  {
72
- "eval_loss": 0.2825596034526825,
73
- "eval_runtime": 1614.1197,
74
- "eval_samples_per_second": 0.559,
75
- "eval_steps_per_second": 0.14,
76
- "epoch": 0.06989748369058714,
77
  "step": 150
78
  },
79
  {
80
- "loss": 0.1607,
81
- "grad_norm": 0.08992636203765869,
82
- "learning_rate": 0.00040697674418604653,
83
- "epoch": 0.081547064305685,
84
- "step": 175
85
- },
86
- {
87
- "loss": 0.3829,
88
- "grad_norm": 0.960233747959137,
89
- "learning_rate": 0.00046511627906976747,
90
- "epoch": 0.09319664492078285,
91
  "step": 200
92
  },
93
  {
94
- "eval_loss": 0.2506251931190491,
95
- "eval_runtime": 1617.641,
96
- "eval_samples_per_second": 0.558,
97
- "eval_steps_per_second": 0.14,
98
- "epoch": 0.09319664492078285,
99
  "step": 200
100
  },
101
  {
102
- "loss": 0.1605,
103
- "grad_norm": 0.04875573888421059,
104
- "learning_rate": 0.0004974106680476438,
105
- "epoch": 0.1048462255358807,
106
- "step": 225
107
- },
108
- {
109
- "loss": 0.3913,
110
- "grad_norm": 0.13655845820903778,
111
- "learning_rate": 0.0004909373381667531,
112
- "epoch": 0.11649580615097857,
113
  "step": 250
114
  },
115
  {
116
- "eval_loss": 0.43965786695480347,
117
- "eval_runtime": 1617.1306,
118
- "eval_samples_per_second": 0.558,
119
- "eval_steps_per_second": 0.14,
120
- "epoch": 0.11649580615097857,
121
  "step": 250
122
  },
123
  {
124
- "loss": 0.1632,
125
- "grad_norm": 0.019430797547101974,
126
- "learning_rate": 0.0004844640082858622,
127
- "epoch": 0.12814538676607642,
128
- "step": 275
129
- },
130
- {
131
- "loss": 0.3724,
132
- "grad_norm": 0.2898576855659485,
133
- "learning_rate": 0.0004779906784049715,
134
- "epoch": 0.13979496738117428,
135
  "step": 300
136
  },
137
  {
138
- "eval_loss": 0.9187588095664978,
139
- "eval_runtime": 1617.9145,
140
- "eval_samples_per_second": 0.558,
141
- "eval_steps_per_second": 0.14,
142
- "epoch": 0.13979496738117428,
143
  "step": 300
144
  },
145
  {
146
- "loss": 0.2034,
147
- "grad_norm": 0.02103794552385807,
148
- "learning_rate": 0.0004715173485240808,
149
- "epoch": 0.15144454799627213,
150
- "step": 325
151
- },
152
- {
153
- "loss": 0.3585,
154
- "grad_norm": 0.12554290890693665,
155
- "learning_rate": 0.0004650440186431901,
156
- "epoch": 0.16309412861137,
157
  "step": 350
158
  },
159
  {
160
- "eval_loss": 0.24577070772647858,
161
- "eval_runtime": 1617.816,
162
- "eval_samples_per_second": 0.558,
163
- "eval_steps_per_second": 0.14,
164
- "epoch": 0.16309412861137,
165
  "step": 350
166
  }
167
  ],
168
- "best_metric": null,
169
- "best_model_checkpoint": null,
170
  "is_local_process_zero": true,
171
  "is_world_process_zero": true,
172
  "is_hyper_param_search": false,
173
  "trial_name": null,
174
  "trial_params": null,
175
  "stateful_callbacks": {
 
 
 
 
 
 
 
 
 
176
  "TrainerControl": {
177
  "args": {
178
- "should_training_stop": false,
179
  "should_epoch_stop": false,
180
  "should_save": true,
181
  "should_evaluate": false,
 
1
  {
2
+ "epoch": 0.03262034577566522,
3
  "global_step": 350,
4
+ "max_steps": 3000,
5
+ "logging_steps": 50,
6
  "eval_steps": 50,
7
+ "save_steps": 50,
8
  "train_batch_size": 8,
9
  "num_train_epochs": 1,
10
  "num_input_tokens_seen": 0,
11
+ "total_flos": 1.6004947053699072e+17,
12
  "log_history": [
13
  {
14
+ "loss": 11.5092,
15
+ "grad_norm": 27.41529655456543,
16
+ "learning_rate": 4.666666666666667e-05,
17
+ "epoch": 0.004660049396523603,
 
 
 
 
 
 
 
18
  "step": 50
19
  },
20
  {
21
+ "eval_loss": 6.5413079261779785,
22
+ "eval_runtime": 484.9209,
23
+ "eval_samples_per_second": 1.862,
24
+ "eval_steps_per_second": 0.466,
25
+ "epoch": 0.004660049396523603,
26
  "step": 50
27
  },
28
  {
29
+ "loss": 0.907,
30
+ "grad_norm": 0.3267304003238678,
31
+ "learning_rate": 9.999988344964554e-05,
32
+ "epoch": 0.009320098793047207,
 
 
 
 
 
 
 
33
  "step": 100
34
  },
35
  {
36
+ "eval_loss": 0.2963584065437317,
37
+ "eval_runtime": 482.3744,
38
+ "eval_samples_per_second": 1.872,
39
+ "eval_steps_per_second": 0.469,
40
+ "epoch": 0.009320098793047207,
41
  "step": 100
42
  },
43
  {
44
+ "loss": 0.2887,
45
+ "grad_norm": 0.23999030888080597,
46
+ "learning_rate": 9.992123261946325e-05,
47
+ "epoch": 0.01398014818957081,
 
 
 
 
 
 
 
48
  "step": 150
49
  },
50
  {
51
+ "eval_loss": 0.26408717036247253,
52
+ "eval_runtime": 483.0929,
53
+ "eval_samples_per_second": 1.869,
54
+ "eval_steps_per_second": 0.468,
55
+ "epoch": 0.01398014818957081,
56
  "step": 150
57
  },
58
  {
59
+ "loss": 0.273,
60
+ "grad_norm": 0.22597914934158325,
61
+ "learning_rate": 9.96971586146684e-05,
62
+ "epoch": 0.018640197586094413,
 
 
 
 
 
 
 
63
  "step": 200
64
  },
65
  {
66
+ "eval_loss": 0.2530948221683502,
67
+ "eval_runtime": 483.695,
68
+ "eval_samples_per_second": 1.867,
69
+ "eval_steps_per_second": 0.467,
70
+ "epoch": 0.018640197586094413,
71
  "step": 200
72
  },
73
  {
74
+ "loss": 0.2655,
75
+ "grad_norm": 0.2108173966407776,
76
+ "learning_rate": 9.932831417461484e-05,
77
+ "epoch": 0.023300246982618015,
 
 
 
 
 
 
 
78
  "step": 250
79
  },
80
  {
81
+ "eval_loss": 0.2521112263202667,
82
+ "eval_runtime": 484.1571,
83
+ "eval_samples_per_second": 1.865,
84
+ "eval_steps_per_second": 0.467,
85
+ "epoch": 0.023300246982618015,
86
  "step": 250
87
  },
88
  {
89
+ "loss": 0.2635,
90
+ "grad_norm": 0.16866298019886017,
91
+ "learning_rate": 9.881577376254393e-05,
92
+ "epoch": 0.02796029637914162,
 
 
 
 
 
 
 
93
  "step": 300
94
  },
95
  {
96
+ "eval_loss": 0.24954503774642944,
97
+ "eval_runtime": 482.9845,
98
+ "eval_samples_per_second": 1.87,
99
+ "eval_steps_per_second": 0.468,
100
+ "epoch": 0.02796029637914162,
101
  "step": 300
102
  },
103
  {
104
+ "loss": 0.2622,
105
+ "grad_norm": 0.19963641464710236,
106
+ "learning_rate": 9.816103043561648e-05,
107
+ "epoch": 0.03262034577566522,
 
 
 
 
 
 
 
108
  "step": 350
109
  },
110
  {
111
+ "eval_loss": 0.24790766835212708,
112
+ "eval_runtime": 482.7586,
113
+ "eval_samples_per_second": 1.87,
114
+ "eval_steps_per_second": 0.468,
115
+ "epoch": 0.03262034577566522,
116
  "step": 350
117
  }
118
  ],
119
+ "best_metric": 0.24790766835212708,
120
+ "best_model_checkpoint": "./multimodal-phi3_5-mini-instruct-llava_adapter/checkpoint-350",
121
  "is_local_process_zero": true,
122
  "is_world_process_zero": true,
123
  "is_hyper_param_search": false,
124
  "trial_name": null,
125
  "trial_params": null,
126
  "stateful_callbacks": {
127
+ "EarlyStoppingCallback": {
128
+ "args": {
129
+ "early_stopping_patience": 3,
130
+ "early_stopping_threshold": 0.01
131
+ },
132
+ "attributes": {
133
+ "early_stopping_patience_counter": 3
134
+ }
135
+ },
136
  "TrainerControl": {
137
  "args": {
138
+ "should_training_stop": true,
139
  "should_epoch_stop": false,
140
  "should_save": true,
141
  "should_evaluate": false,