lesso commited on
Commit
ef0ce01
1 Parent(s): 47d2270

End of training

Browse files
Files changed (2) hide show
  1. README.md +10 -15
  2. adapter_model.bin +1 -1
README.md CHANGED
@@ -22,7 +22,6 @@ adapter: lora
22
  base_model: unsloth/Llama-3.2-1B-Instruct
23
  bf16: false
24
  chat_template: llama3
25
- dataset_prepared_path: null
26
  datasets:
27
  - data_files:
28
  - 51bfccbb1c68b256_train_data.json
@@ -41,8 +40,8 @@ deepspeed: null
41
  early_stopping_patience: null
42
  eval_max_new_tokens: 128
43
  eval_table_size: null
44
- evals_per_epoch: 4
45
- flash_attention: true
46
  fp16: true
47
  fsdp: null
48
  fsdp_config: null
@@ -65,7 +64,7 @@ lora_model_dir: null
65
  lora_r: 8
66
  lora_target_linear: true
67
  lr_scheduler: cosine
68
- max_steps: 10
69
  micro_batch_size: 1
70
  mlflow_experiment_name: /tmp/51bfccbb1c68b256_train_data.json
71
  model_type: AutoModelForCausalLM
@@ -76,7 +75,7 @@ pad_to_sequence_len: true
76
  resume_from_checkpoint: null
77
  s2_attention: null
78
  sample_packing: false
79
- saves_per_epoch: 4
80
  sequence_len: 1024
81
  strict: false
82
  tf32: false
@@ -90,7 +89,7 @@ wandb_name: 4d407371-e885-4538-9aa3-e0817fccb84f
90
  wandb_project: Gradients-On-Demand
91
  wandb_run: your_name
92
  wandb_runid: 4d407371-e885-4538-9aa3-e0817fccb84f
93
- warmup_steps: 10
94
  weight_decay: 0.0
95
  xformers_attention: null
96
 
@@ -102,7 +101,7 @@ xformers_attention: null
102
 
103
  This model is a fine-tuned version of [unsloth/Llama-3.2-1B-Instruct](https://huggingface.co/unsloth/Llama-3.2-1B-Instruct) on the None dataset.
104
  It achieves the following results on the evaluation set:
105
- - Loss: 0.9398
106
 
107
  ## Model description
108
 
@@ -129,18 +128,14 @@ The following hyperparameters were used during training:
129
  - total_train_batch_size: 4
130
  - optimizer: Use OptimizerNames.ADAMW_HF with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
131
  - lr_scheduler_type: cosine
132
- - lr_scheduler_warmup_steps: 10
133
- - training_steps: 10
134
  - mixed_precision_training: Native AMP
135
 
136
  ### Training results
137
 
138
- | Training Loss | Epoch | Step | Validation Loss |
139
- |:-------------:|:------:|:----:|:---------------:|
140
- | 1.3831 | 0.0014 | 1 | 1.0150 |
141
- | 0.8161 | 0.0042 | 3 | 1.0096 |
142
- | 0.8737 | 0.0083 | 6 | 0.9795 |
143
- | 0.5585 | 0.0125 | 9 | 0.9398 |
144
 
145
 
146
  ### Framework versions
 
22
  base_model: unsloth/Llama-3.2-1B-Instruct
23
  bf16: false
24
  chat_template: llama3
 
25
  datasets:
26
  - data_files:
27
  - 51bfccbb1c68b256_train_data.json
 
40
  early_stopping_patience: null
41
  eval_max_new_tokens: 128
42
  eval_table_size: null
43
+ evals_per_epoch: 1
44
+ flash_attention: false
45
  fp16: true
46
  fsdp: null
47
  fsdp_config: null
 
64
  lora_r: 8
65
  lora_target_linear: true
66
  lr_scheduler: cosine
67
+ max_steps: 1000
68
  micro_batch_size: 1
69
  mlflow_experiment_name: /tmp/51bfccbb1c68b256_train_data.json
70
  model_type: AutoModelForCausalLM
 
75
  resume_from_checkpoint: null
76
  s2_attention: null
77
  sample_packing: false
78
+ saves_per_epoch: 1
79
  sequence_len: 1024
80
  strict: false
81
  tf32: false
 
89
  wandb_project: Gradients-On-Demand
90
  wandb_run: your_name
91
  wandb_runid: 4d407371-e885-4538-9aa3-e0817fccb84f
92
+ warmup_steps: 0
93
  weight_decay: 0.0
94
  xformers_attention: null
95
 
 
101
 
102
  This model is a fine-tuned version of [unsloth/Llama-3.2-1B-Instruct](https://huggingface.co/unsloth/Llama-3.2-1B-Instruct) on the None dataset.
103
  It achieves the following results on the evaluation set:
104
+ - Loss: 0.8004
105
 
106
  ## Model description
107
 
 
128
  - total_train_batch_size: 4
129
  - optimizer: Use OptimizerNames.ADAMW_HF with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
130
  - lr_scheduler_type: cosine
131
+ - training_steps: 719
 
132
  - mixed_precision_training: Native AMP
133
 
134
  ### Training results
135
 
136
+ | Training Loss | Epoch | Step | Validation Loss |
137
+ |:-------------:|:-----:|:----:|:---------------:|
138
+ | 1.321 | 1.0 | 719 | 0.8004 |
 
 
 
139
 
140
 
141
  ### Framework versions
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cd3152f857d7036d180982c9b7705ea4d2a1029b8997071ce41cc3877da2701
3
  size 22624970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7edb84550813825630cac200c53d182424fce5c2f31c70a2298a4a5798681278
3
  size 22624970