Fidlobabovic commited on
Commit
c0fe1f4
1 Parent(s): cc824e0

Upload 12 files

Browse files
adapter_config.json CHANGED
@@ -1,17 +1,27 @@
1
  {
 
2
  "auto_mapping": null,
3
- "base_model_name_or_path": null,
 
 
4
  "inference_mode": true,
5
- "num_attention_heads": 12,
6
- "num_layers": 12,
7
- "num_transformer_submodules": 1,
8
- "num_virtual_tokens": 20,
9
- "peft_type": "PROMPT_TUNING",
10
- "prompt_tuning_init": "TEXT",
11
- "prompt_tuning_init_text": "History : {user_pusrchases} Candidates for recommendations {candidates} Reccomendadion: {output predict}",
 
 
 
 
 
12
  "revision": null,
 
 
 
 
13
  "task_type": "SEQ_2_SEQ_LM",
14
- "token_dim": 768,
15
- "tokenizer_kwargs": null,
16
- "tokenizer_name_or_path": "t5-base"
17
  }
 
1
  {
2
+ "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "t5-base",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
  "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 32,
13
+ "lora_dropout": 0.05,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 16,
19
+ "rank_pattern": {},
20
  "revision": null,
21
+ "target_modules": [
22
+ "q",
23
+ "v"
24
+ ],
25
  "task_type": "SEQ_2_SEQ_LM",
26
+ "use_rslora": false
 
 
27
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43866d6957f976b1e840afcacd36624c650893178c4087ea720677759d013bcd
3
- size 61560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53c3f097205552429368c45aaf8a706fcc25c4e8a1f56022779394c0d5584342
3
+ size 7098016
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:927c69c73471df9cb2149c072d27a940cee974c1f4a98f41d4f25577a2ad3bc2
3
- size 125540
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66310fe24e2c2135ec5682f73bf2af1b494d686dded923489301df77b614ceea
3
+ size 14241722
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8aae7ddb825e2108f386c7feedce97a56a7a0540c59364fe9fed05f46be7c1af
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d83ebbc412940984c9cce0d8d151956673a404462d05157cc5a5af4b26ece0e8
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2999ba43e6e7aa2277ff0ab7ea263cb4d77e5ea9d4f3e2d843f093f3849599d6
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e803851e932423d43f5c60c4ef2a6a6cd13c22b0ec6bc5cb6653edd74b0a5e2
3
  size 1000
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 3,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 5,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
tokenizer_config.json CHANGED
@@ -930,7 +930,7 @@
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
933
- "model_max_length": 512,
934
  "pad_token": "<pad>",
935
  "tokenizer_class": "T5Tokenizer",
936
  "unk_token": "<unk>"
 
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
933
+ "model_max_length": 1000000000000000019884624838656,
934
  "pad_token": "<pad>",
935
  "tokenizer_class": "T5Tokenizer",
936
  "unk_token": "<unk>"
trainer_state.json CHANGED
@@ -1,96 +1,149 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.6,
5
  "eval_steps": 500,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.6,
13
- "learning_rate": 0.0009337068160597573,
14
- "loss": 11.895,
 
15
  "step": 500
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_gen_len": 19.0,
20
- "eval_loss": 12.336615562438965,
21
- "eval_rouge1": 0.14302579365079365,
22
- "eval_rouge2": 0.015,
23
- "eval_rougeL": 0.14075396825396827,
24
- "eval_rougeLsum": 0.14339285714285716,
25
- "eval_runtime": 496.3231,
26
- "eval_samples_per_second": 8.059,
27
- "eval_steps_per_second": 1.007,
28
- "step": 833
29
  },
30
  {
31
- "epoch": 1.2,
32
- "learning_rate": 0.0008670134720554889,
33
- "loss": 11.6266,
 
34
  "step": 1000
35
  },
36
  {
37
- "epoch": 1.8,
38
- "learning_rate": 0.0008003201280512204,
39
- "loss": 10.0972,
 
40
  "step": 1500
41
  },
42
  {
43
  "epoch": 2.0,
44
- "eval_gen_len": 3.70375,
45
- "eval_loss": 8.050804138183594,
46
- "eval_rouge1": 0.1910912698412698,
47
- "eval_rouge2": 0.0,
48
- "eval_rougeL": 0.18945436507936506,
49
- "eval_rougeLsum": 0.19415674603174599,
50
- "eval_runtime": 427.3914,
51
- "eval_samples_per_second": 9.359,
52
- "eval_steps_per_second": 1.17,
53
- "step": 1666
54
  },
55
  {
56
- "epoch": 2.4,
57
- "learning_rate": 0.0007336267840469522,
58
- "loss": 7.7492,
 
59
  "step": 2000
60
  },
61
  {
62
  "epoch": 3.0,
63
- "learning_rate": 0.0006669334400426837,
64
- "loss": 7.4823,
65
- "step": 2500
 
 
 
 
 
 
 
66
  },
67
  {
68
- "epoch": 3.0,
69
- "eval_gen_len": 2.8205,
70
- "eval_loss": 8.026236534118652,
71
- "eval_rouge1": 0.14571428571428569,
72
- "eval_rouge2": 0.0,
73
- "eval_rougeL": 0.1469047619047619,
74
- "eval_rougeLsum": 0.14785714285714283,
75
- "eval_runtime": 385.1352,
76
- "eval_samples_per_second": 10.386,
77
- "eval_steps_per_second": 1.298,
78
  "step": 2500
79
  },
80
  {
81
- "epoch": 3.6,
82
- "learning_rate": 0.0006002400960384153,
83
- "loss": 7.4336,
 
84
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
  ],
87
  "logging_steps": 500,
88
- "max_steps": 5831,
89
  "num_input_tokens_seen": 0,
90
  "num_train_epochs": 7,
91
  "save_steps": 500,
92
- "total_flos": 6.6354520522752e+16,
93
- "train_batch_size": 8,
94
  "trial_name": null,
95
  "trial_params": null
96
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.0,
5
  "eval_steps": 500,
6
+ "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.67,
13
+ "grad_norm": 10.763190269470215,
14
+ "learning_rate": 3.335112059765208e-05,
15
+ "loss": 1.5382,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_gen_len": 4.8812,
21
+ "eval_loss": 1.1502126455307007,
22
+ "eval_rouge1": 60.247880952380996,
23
+ "eval_rouge2": 42.58590476190485,
24
+ "eval_rougeL": 60.23339682539681,
25
+ "eval_rougeLsum": 60.23446031746034,
26
+ "eval_runtime": 1016.7664,
27
+ "eval_samples_per_second": 4.918,
28
+ "eval_steps_per_second": 1.229,
29
+ "step": 750
30
  },
31
  {
32
+ "epoch": 1.33,
33
+ "grad_norm": 4.3876237869262695,
34
+ "learning_rate": 6.670224119530416e-06,
35
+ "loss": 1.2867,
36
  "step": 1000
37
  },
38
  {
39
+ "epoch": 2.0,
40
+ "grad_norm": 5.068687915802002,
41
+ "learning_rate": 0.0,
42
+ "loss": 1.1906,
43
  "step": 1500
44
  },
45
  {
46
  "epoch": 2.0,
47
+ "eval_gen_len": 4.9042,
48
+ "eval_loss": 1.1154725551605225,
49
+ "eval_rouge1": 60.75021428571432,
50
+ "eval_rouge2": 43.73785714285723,
51
+ "eval_rougeL": 60.75853968253971,
52
+ "eval_rougeLsum": 60.7197619047618,
53
+ "eval_runtime": 1019.2932,
54
+ "eval_samples_per_second": 4.905,
55
+ "eval_steps_per_second": 1.226,
56
+ "step": 1500
57
  },
58
  {
59
+ "epoch": 2.67,
60
+ "grad_norm": 6.656228065490723,
61
+ "learning_rate": 0.0,
62
+ "loss": 1.2365,
63
  "step": 2000
64
  },
65
  {
66
  "epoch": 3.0,
67
+ "eval_gen_len": 4.9042,
68
+ "eval_loss": 1.1154857873916626,
69
+ "eval_rouge1": 60.75021428571432,
70
+ "eval_rouge2": 43.73785714285723,
71
+ "eval_rougeL": 60.75853968253971,
72
+ "eval_rougeLsum": 60.7197619047618,
73
+ "eval_runtime": 1019.9367,
74
+ "eval_samples_per_second": 4.902,
75
+ "eval_steps_per_second": 1.226,
76
+ "step": 2250
77
  },
78
  {
79
+ "epoch": 3.33,
80
+ "grad_norm": 8.076435089111328,
81
+ "learning_rate": 0.0,
82
+ "loss": 1.2041,
 
 
 
 
 
 
83
  "step": 2500
84
  },
85
  {
86
+ "epoch": 4.0,
87
+ "grad_norm": 3.218427896499634,
88
+ "learning_rate": 0.0,
89
+ "loss": 1.1962,
90
  "step": 3000
91
+ },
92
+ {
93
+ "epoch": 4.0,
94
+ "eval_gen_len": 4.9042,
95
+ "eval_loss": 1.1154814958572388,
96
+ "eval_rouge1": 60.75021428571432,
97
+ "eval_rouge2": 43.73785714285723,
98
+ "eval_rougeL": 60.75853968253971,
99
+ "eval_rougeLsum": 60.7197619047618,
100
+ "eval_runtime": 1019.891,
101
+ "eval_samples_per_second": 4.902,
102
+ "eval_steps_per_second": 1.226,
103
+ "step": 3000
104
+ },
105
+ {
106
+ "epoch": 4.67,
107
+ "grad_norm": 3.1441781520843506,
108
+ "learning_rate": 0.0,
109
+ "loss": 1.2323,
110
+ "step": 3500
111
+ },
112
+ {
113
+ "epoch": 5.0,
114
+ "eval_gen_len": 4.9042,
115
+ "eval_loss": 1.1154634952545166,
116
+ "eval_rouge1": 60.75021428571432,
117
+ "eval_rouge2": 43.73785714285723,
118
+ "eval_rougeL": 60.75853968253971,
119
+ "eval_rougeLsum": 60.7197619047618,
120
+ "eval_runtime": 1016.3103,
121
+ "eval_samples_per_second": 4.92,
122
+ "eval_steps_per_second": 1.23,
123
+ "step": 3750
124
+ },
125
+ {
126
+ "epoch": 5.33,
127
+ "grad_norm": 4.466716766357422,
128
+ "learning_rate": 0.0,
129
+ "loss": 1.2174,
130
+ "step": 4000
131
+ },
132
+ {
133
+ "epoch": 6.0,
134
+ "grad_norm": 5.4029083251953125,
135
+ "learning_rate": 0.0,
136
+ "loss": 1.1927,
137
+ "step": 4500
138
  }
139
  ],
140
  "logging_steps": 500,
141
+ "max_steps": 5250,
142
  "num_input_tokens_seen": 0,
143
  "num_train_epochs": 7,
144
  "save_steps": 500,
145
+ "total_flos": 1.1059086753792e+16,
146
+ "train_batch_size": 4,
147
  "trial_name": null,
148
  "trial_params": null
149
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ced3c5c6321ce7569e9d56970213deab34781599a975056366d424ad8d88f948
3
- size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46df985f32c9133404178778d30698cb855d173a08623ee3e2588b465a4db162
3
+ size 5048