diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..838f537b1852369003d9ae824f4cda1aa2172ed1 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f874f4ba3edc90052241c7717501a02655e2297360d14d56b72da1cb37c3ff50 +size 134433530 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9debc8b1c0a60cf17ccdefc443a40ec21f256152 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f83980e6918d10a8b78990790050b48770758bad20f2137d55b21afba343f8ba +size 14244 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..318a48d667b3ecd4666dc692587f103642e3669d --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a1553fa3c9fa4158c1d95c29f303ea1c82da9959617b77d6296624e38c6ca27 +size 1064 diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5701a68307ba3b08f37ff0ebd11df32644144a76 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,121 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.16326530612245, + "eval_steps": 20, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 2.5467399903903744e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-120/README.md b/checkpoint-120/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-120/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-120/adapter_config.json b/checkpoint-120/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-120/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-120/adapter_model.safetensors b/checkpoint-120/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-120/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-120/optimizer.pt b/checkpoint-120/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2eb463a4ffe919990515e9e441aa46e86fb4dab6 --- /dev/null +++ b/checkpoint-120/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb63a7e3bd7b955d6919ac92ce44c227ae1fd0f5f284eb71d353294d5e45da6c +size 134433530 diff --git a/checkpoint-120/rng_state.pth b/checkpoint-120/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..007105d08d7e2795df2fe904950b5e4bbc64ee53 --- /dev/null +++ b/checkpoint-120/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2f836e8affa585842a356aad502d328cdc3e50fea8fcc717e527e492c41fee1 +size 14244 diff --git a/checkpoint-120/scheduler.pt b/checkpoint-120/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fa93f4b997f90de52bbb57f00f5b82cee75381a --- /dev/null +++ b/checkpoint-120/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:915b81b2641ff3c1c29a91260eaebd9d13138742f1687cfcdc8c68bcb9ea698e +size 1064 diff --git a/checkpoint-120/trainer_state.json b/checkpoint-120/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..088299ca80f27cb2654e155846613bbf3f4158fe --- /dev/null +++ b/checkpoint-120/trainer_state.json @@ -0,0 +1,141 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.795918367346939, + "eval_steps": 20, + "global_step": 120, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 3.057919443515474e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-120/training_args.bin b/checkpoint-120/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-120/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-140/README.md b/checkpoint-140/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-140/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-140/adapter_config.json b/checkpoint-140/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-140/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-140/adapter_model.safetensors b/checkpoint-140/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-140/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-140/optimizer.pt b/checkpoint-140/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..86330fa3abc544e22343f9ca7b993038a59fd3c0 --- /dev/null +++ b/checkpoint-140/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38597b5c11e9032f7857b2d4dca90788fcf80f6c01eba76d19895569d98c4028 +size 134433530 diff --git a/checkpoint-140/rng_state.pth b/checkpoint-140/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2c2f0bf15688208bdaf1bc692391f09bdf93c2ed --- /dev/null +++ b/checkpoint-140/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c6ff023b8ad03758e2bf7962e97407004f6fa05830ced892f656565e28339a5 +size 14244 diff --git a/checkpoint-140/scheduler.pt b/checkpoint-140/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..608ef2d9b88b439e133656d8fcce0b3141115e28 --- /dev/null +++ b/checkpoint-140/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86d5f7e9b17251247c39fdd59964f3693734ddb120bd20dcb453e9efae1445bd +size 1064 diff --git a/checkpoint-140/trainer_state.json b/checkpoint-140/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e33e45435f9b7065f2127ee3413092459f2f200b --- /dev/null +++ b/checkpoint-140/trainer_state.json @@ -0,0 +1,161 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 11.428571428571429, + "eval_steps": 20, + "global_step": 140, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 3.565334239043912e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-140/training_args.bin b/checkpoint-140/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-140/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-160/README.md b/checkpoint-160/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-160/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-160/adapter_config.json b/checkpoint-160/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-160/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-160/adapter_model.safetensors b/checkpoint-160/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-160/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-160/optimizer.pt b/checkpoint-160/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..604dc5cb0aa13798e14e27ce35f0248d54ac384a --- /dev/null +++ b/checkpoint-160/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d666de4a36e9f322bf3bc17b971294f63e1062b91f96c542029442ec23e59c +size 134433530 diff --git a/checkpoint-160/rng_state.pth b/checkpoint-160/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c968088442b5a78a9d91ab8dd5864ef52923ecc --- /dev/null +++ b/checkpoint-160/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696df26525098d79bb3e3b139111645ab2642f9fe3067f1ce1eb4c0c8eead651 +size 14244 diff --git a/checkpoint-160/scheduler.pt b/checkpoint-160/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..384d44e5e6558d886996d98c0679b2de9b934a4c --- /dev/null +++ b/checkpoint-160/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6bc91ca4e04e59b6aa0def614e5d23daa02dfd514357b40e48edfe0bf128e03 +size 1064 diff --git a/checkpoint-160/trainer_state.json b/checkpoint-160/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5ad79b7f48df33fa7478cb8d2d0b2681d334feb9 --- /dev/null +++ b/checkpoint-160/trainer_state.json @@ -0,0 +1,181 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 13.061224489795919, + "eval_steps": 20, + "global_step": 160, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 4.0727490345723494e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-160/training_args.bin b/checkpoint-160/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-160/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-180/README.md b/checkpoint-180/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-180/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-180/adapter_config.json b/checkpoint-180/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-180/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-180/adapter_model.safetensors b/checkpoint-180/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-180/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-180/optimizer.pt b/checkpoint-180/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c968bc77aeb3de4ffc0ea4fdb0b7962cadea3f2 --- /dev/null +++ b/checkpoint-180/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8197ec33e4b8ca50ad19f31fddddc89bd576aa2c9094368dd4313d6168435d5c +size 134433530 diff --git a/checkpoint-180/rng_state.pth b/checkpoint-180/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cce5f5339f521af50e9b28b25541927b24ed428e --- /dev/null +++ b/checkpoint-180/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba0b981dc12239aaf4850646865bbde8668564ad7178501587a5c65a1da4333 +size 14244 diff --git a/checkpoint-180/scheduler.pt b/checkpoint-180/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..900ec9e5bde9a5e4f8680c6956fe26c89923d01b --- /dev/null +++ b/checkpoint-180/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5dfc3bc75009efe48747cf9948a244ecefe4e6b758242f83bed075794e9a377 +size 1064 diff --git a/checkpoint-180/trainer_state.json b/checkpoint-180/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fa68dadde6b5d317944f694d803d64c40cda201d --- /dev/null +++ b/checkpoint-180/trainer_state.json @@ -0,0 +1,201 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 14.693877551020408, + "eval_steps": 20, + "global_step": 180, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + }, + { + "epoch": 13.88, + "learning_rate": 0.00023, + "loss": 0.0466, + "step": 170 + }, + { + "epoch": 14.69, + "learning_rate": 0.00021999999999999995, + "loss": 0.0365, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": 0.263688862323761, + "eval_runtime": 30.5969, + "eval_samples_per_second": 12.648, + "eval_steps_per_second": 1.601, + "step": 180 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 4.584742467718349e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-180/training_args.bin b/checkpoint-180/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-180/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-20/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-20/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-20/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ae753d0e5e554c5a4962a86c56334e65a034407 --- /dev/null +++ b/checkpoint-20/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51b3740dfb324e345a0eea52b52c7c4e483dda810c019fad2bf567e2b9726fff +size 134433530 diff --git a/checkpoint-20/rng_state.pth b/checkpoint-20/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..331cedfd8456aabd2958495aea8865f47a77b355 --- /dev/null +++ b/checkpoint-20/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f61e883dcc30853d98a024849c79c146662f7571765d5c6ec7e0f3fc38c998c0 +size 14244 diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0b30d480a76e7df4b910aa4733f5d1c8ecf4338 --- /dev/null +++ b/checkpoint-20/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bc30782c80f39b95ece53e16ed533d0eb1d775796dc4d2c39691e355b142ab4 +size 1064 diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c2f59ca37d2f76827d8f855666ca6a4ac23fb180 --- /dev/null +++ b/checkpoint-20/trainer_state.json @@ -0,0 +1,41 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6326530612244898, + "eval_steps": 20, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 5.116881906381619e+16, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-20/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-200/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-200/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5b1a45dcacd4b560eac5a61d974722d9a7ea80f --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f0a815caf219e8e3fb88151ad57420cc40453921de5c1b9ebd9d8178aee11d4 +size 134433530 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a1b4ec96c7dd3b7fd0797b476bd4ccacc2ddbe98 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b8b65f27c55eb1f131c53aafc10342d98e1c8bad32a89dc5e6fb3953ebec16c +size 14244 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f613c3ff4de97ff4d7e9aa2a9f8ee26805165829 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6795bda54303bd8bc8385fc9d0f4a2d7f6d29bd4f0ce908d2fb2991845ecc313 +size 1064 diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7cd63dd2d1727d222520bfa908a58d99e930f63e --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,221 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 16.3265306122449, + "eval_steps": 20, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + }, + { + "epoch": 13.88, + "learning_rate": 0.00023, + "loss": 0.0466, + "step": 170 + }, + { + "epoch": 14.69, + "learning_rate": 0.00021999999999999995, + "loss": 0.0365, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": 0.263688862323761, + "eval_runtime": 30.5969, + "eval_samples_per_second": 12.648, + "eval_steps_per_second": 1.601, + "step": 180 + }, + { + "epoch": 15.51, + "learning_rate": 0.00020999999999999998, + "loss": 0.029, + "step": 190 + }, + { + "epoch": 16.33, + "learning_rate": 0.00019999999999999998, + "loss": 0.0244, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": 0.28023940324783325, + "eval_runtime": 30.5334, + "eval_samples_per_second": 12.675, + "eval_steps_per_second": 1.605, + "step": 200 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 5.091445030728499e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-220/README.md b/checkpoint-220/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-220/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-220/adapter_config.json b/checkpoint-220/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-220/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-220/adapter_model.safetensors b/checkpoint-220/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-220/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-220/optimizer.pt b/checkpoint-220/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ff4afe3c45a8ee4ea64afbcae9a7e9cb7e35d3e --- /dev/null +++ b/checkpoint-220/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c401cd7fbf190257a33a6941e0c27460283f06f17c8d3a391ede93532e7116 +size 134433530 diff --git a/checkpoint-220/rng_state.pth b/checkpoint-220/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1a6a3b45e466708fdb65db3e0c0bc2c059d386dc --- /dev/null +++ b/checkpoint-220/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a71dc0c7e032ae3a37adc21a08954ac47f0d498aab14ce8ac78d69fcb0a03a04 +size 14244 diff --git a/checkpoint-220/scheduler.pt b/checkpoint-220/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..46f9dfab9e077ea12afa1d94a7c8aff731125da3 --- /dev/null +++ b/checkpoint-220/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd18ed34ef6e0139641e61e552552fe67f5e90236dba94b6c8eca5985486bd06 +size 1064 diff --git a/checkpoint-220/trainer_state.json b/checkpoint-220/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..97cc24dd34386cad16aa17ca6a31a1bc1151c93b --- /dev/null +++ b/checkpoint-220/trainer_state.json @@ -0,0 +1,241 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 17.959183673469386, + "eval_steps": 20, + "global_step": 220, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + }, + { + "epoch": 13.88, + "learning_rate": 0.00023, + "loss": 0.0466, + "step": 170 + }, + { + "epoch": 14.69, + "learning_rate": 0.00021999999999999995, + "loss": 0.0365, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": 0.263688862323761, + "eval_runtime": 30.5969, + "eval_samples_per_second": 12.648, + "eval_steps_per_second": 1.601, + "step": 180 + }, + { + "epoch": 15.51, + "learning_rate": 0.00020999999999999998, + "loss": 0.029, + "step": 190 + }, + { + "epoch": 16.33, + "learning_rate": 0.00019999999999999998, + "loss": 0.0244, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": 0.28023940324783325, + "eval_runtime": 30.5334, + "eval_samples_per_second": 12.675, + "eval_steps_per_second": 1.605, + "step": 200 + }, + { + "epoch": 17.14, + "learning_rate": 0.00018999999999999998, + "loss": 0.0208, + "step": 210 + }, + { + "epoch": 17.96, + "learning_rate": 0.00017999999999999998, + "loss": 0.0186, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": 0.30082279443740845, + "eval_runtime": 30.6266, + "eval_samples_per_second": 12.636, + "eval_steps_per_second": 1.6, + "step": 220 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 5.602624483853599e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-220/training_args.bin b/checkpoint-220/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-220/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-240/README.md b/checkpoint-240/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-240/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-240/adapter_config.json b/checkpoint-240/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-240/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-240/adapter_model.safetensors b/checkpoint-240/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-240/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-240/optimizer.pt b/checkpoint-240/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7186524f24409216adf56b8fa7e9c32d522af310 --- /dev/null +++ b/checkpoint-240/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a3143e4bca63e012de7d74be1472aa68a9111ab213b9e19a848ec832465989a +size 134433530 diff --git a/checkpoint-240/rng_state.pth b/checkpoint-240/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1176b60c26afdf7054efc7de4b4d33b68afd8d48 --- /dev/null +++ b/checkpoint-240/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:127dc7fd12cbd648a2a048e64a69de5b812b625b7bf8eb2f14ac44a34ff1105d +size 14244 diff --git a/checkpoint-240/scheduler.pt b/checkpoint-240/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd718e258bc8056a6e0a813b8d692817c2f44439 --- /dev/null +++ b/checkpoint-240/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35029adf67ec7b6288431b13620690917e0d68e95151a8d732f3eb783ef0c816 +size 1064 diff --git a/checkpoint-240/trainer_state.json b/checkpoint-240/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..906de39f1c6998a6a98ba0da6e6923a050cc18a5 --- /dev/null +++ b/checkpoint-240/trainer_state.json @@ -0,0 +1,261 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 19.591836734693878, + "eval_steps": 20, + "global_step": 240, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + }, + { + "epoch": 13.88, + "learning_rate": 0.00023, + "loss": 0.0466, + "step": 170 + }, + { + "epoch": 14.69, + "learning_rate": 0.00021999999999999995, + "loss": 0.0365, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": 0.263688862323761, + "eval_runtime": 30.5969, + "eval_samples_per_second": 12.648, + "eval_steps_per_second": 1.601, + "step": 180 + }, + { + "epoch": 15.51, + "learning_rate": 0.00020999999999999998, + "loss": 0.029, + "step": 190 + }, + { + "epoch": 16.33, + "learning_rate": 0.00019999999999999998, + "loss": 0.0244, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": 0.28023940324783325, + "eval_runtime": 30.5334, + "eval_samples_per_second": 12.675, + "eval_steps_per_second": 1.605, + "step": 200 + }, + { + "epoch": 17.14, + "learning_rate": 0.00018999999999999998, + "loss": 0.0208, + "step": 210 + }, + { + "epoch": 17.96, + "learning_rate": 0.00017999999999999998, + "loss": 0.0186, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": 0.30082279443740845, + "eval_runtime": 30.6266, + "eval_samples_per_second": 12.636, + "eval_steps_per_second": 1.6, + "step": 220 + }, + { + "epoch": 18.78, + "learning_rate": 0.00016999999999999999, + "loss": 0.0174, + "step": 230 + }, + { + "epoch": 19.59, + "learning_rate": 0.00015999999999999999, + "loss": 0.0161, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": 0.3072655200958252, + "eval_runtime": 30.5563, + "eval_samples_per_second": 12.665, + "eval_steps_per_second": 1.604, + "step": 240 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 6.110039279382036e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-240/training_args.bin b/checkpoint-240/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-260/README.md b/checkpoint-260/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-260/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-260/adapter_config.json b/checkpoint-260/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-260/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-260/adapter_model.safetensors b/checkpoint-260/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-260/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-260/optimizer.pt b/checkpoint-260/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..75ba13df48e02a2b7fb41457c99b02f564065b25 --- /dev/null +++ b/checkpoint-260/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41467f7891a68e0e1f3254c76d1e77f81d058334bd9404ce5dff61130bf35d77 +size 134433530 diff --git a/checkpoint-260/rng_state.pth b/checkpoint-260/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5e8aee49445d0e294f9e5950b6f9958a7de3057 --- /dev/null +++ b/checkpoint-260/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0766b5e7464d6fad499891fc2cc4274e4d4aff10092f28a1b456b5653ac55a +size 14244 diff --git a/checkpoint-260/scheduler.pt b/checkpoint-260/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7dc1cbf308c73ec7f2c8ba6761bfa8c3cb511190 --- /dev/null +++ b/checkpoint-260/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:618426a7b0745c9d6476d3c7a67a48976de53907877b0f786d6ae0b3e2e942da +size 1064 diff --git a/checkpoint-260/trainer_state.json b/checkpoint-260/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..58b087269f7519287e00b5892c4d5a536c04e71e --- /dev/null +++ b/checkpoint-260/trainer_state.json @@ -0,0 +1,281 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 21.224489795918366, + "eval_steps": 20, + "global_step": 260, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + }, + { + "epoch": 13.88, + "learning_rate": 0.00023, + "loss": 0.0466, + "step": 170 + }, + { + "epoch": 14.69, + "learning_rate": 0.00021999999999999995, + "loss": 0.0365, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": 0.263688862323761, + "eval_runtime": 30.5969, + "eval_samples_per_second": 12.648, + "eval_steps_per_second": 1.601, + "step": 180 + }, + { + "epoch": 15.51, + "learning_rate": 0.00020999999999999998, + "loss": 0.029, + "step": 190 + }, + { + "epoch": 16.33, + "learning_rate": 0.00019999999999999998, + "loss": 0.0244, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": 0.28023940324783325, + "eval_runtime": 30.5334, + "eval_samples_per_second": 12.675, + "eval_steps_per_second": 1.605, + "step": 200 + }, + { + "epoch": 17.14, + "learning_rate": 0.00018999999999999998, + "loss": 0.0208, + "step": 210 + }, + { + "epoch": 17.96, + "learning_rate": 0.00017999999999999998, + "loss": 0.0186, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": 0.30082279443740845, + "eval_runtime": 30.6266, + "eval_samples_per_second": 12.636, + "eval_steps_per_second": 1.6, + "step": 220 + }, + { + "epoch": 18.78, + "learning_rate": 0.00016999999999999999, + "loss": 0.0174, + "step": 230 + }, + { + "epoch": 19.59, + "learning_rate": 0.00015999999999999999, + "loss": 0.0161, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": 0.3072655200958252, + "eval_runtime": 30.5563, + "eval_samples_per_second": 12.665, + "eval_steps_per_second": 1.604, + "step": 240 + }, + { + "epoch": 20.41, + "learning_rate": 0.00015, + "loss": 0.0153, + "step": 250 + }, + { + "epoch": 21.22, + "learning_rate": 0.00014, + "loss": 0.0147, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": 0.32090628147125244, + "eval_runtime": 30.6058, + "eval_samples_per_second": 12.645, + "eval_steps_per_second": 1.601, + "step": 260 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 6.618268054931374e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-260/training_args.bin b/checkpoint-260/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-260/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-280/README.md b/checkpoint-280/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-280/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-280/adapter_config.json b/checkpoint-280/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-280/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-280/adapter_model.safetensors b/checkpoint-280/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-280/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-280/optimizer.pt b/checkpoint-280/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5f413318ddb9114138d6ee70d65cea0afe5392d --- /dev/null +++ b/checkpoint-280/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bbcaed6ac019f9c5fe8ea816bb2d58a7ae3a9f635bccc8d582ee96ed56178c6 +size 134433530 diff --git a/checkpoint-280/rng_state.pth b/checkpoint-280/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b53e9a85677829c8fb3d6c76d6b8ba3a21a7c8c1 --- /dev/null +++ b/checkpoint-280/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b979071679b4d774fea367b94a5cfb2c16f17d8c10ebd6239190cdfd4cb8413c +size 14244 diff --git a/checkpoint-280/scheduler.pt b/checkpoint-280/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2eea9aa1d6851803e5b4cf6bad706f10867b92a --- /dev/null +++ b/checkpoint-280/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:326b29a713a65b60d1cdb16e40a0ac0c9e0bc23145aea4ec7d4e2e525de2f511 +size 1064 diff --git a/checkpoint-280/trainer_state.json b/checkpoint-280/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..af838e345cf1677c896ccc3bb6a1c3bf8168eec8 --- /dev/null +++ b/checkpoint-280/trainer_state.json @@ -0,0 +1,301 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 22.857142857142858, + "eval_steps": 20, + "global_step": 280, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + }, + { + "epoch": 13.88, + "learning_rate": 0.00023, + "loss": 0.0466, + "step": 170 + }, + { + "epoch": 14.69, + "learning_rate": 0.00021999999999999995, + "loss": 0.0365, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": 0.263688862323761, + "eval_runtime": 30.5969, + "eval_samples_per_second": 12.648, + "eval_steps_per_second": 1.601, + "step": 180 + }, + { + "epoch": 15.51, + "learning_rate": 0.00020999999999999998, + "loss": 0.029, + "step": 190 + }, + { + "epoch": 16.33, + "learning_rate": 0.00019999999999999998, + "loss": 0.0244, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": 0.28023940324783325, + "eval_runtime": 30.5334, + "eval_samples_per_second": 12.675, + "eval_steps_per_second": 1.605, + "step": 200 + }, + { + "epoch": 17.14, + "learning_rate": 0.00018999999999999998, + "loss": 0.0208, + "step": 210 + }, + { + "epoch": 17.96, + "learning_rate": 0.00017999999999999998, + "loss": 0.0186, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": 0.30082279443740845, + "eval_runtime": 30.6266, + "eval_samples_per_second": 12.636, + "eval_steps_per_second": 1.6, + "step": 220 + }, + { + "epoch": 18.78, + "learning_rate": 0.00016999999999999999, + "loss": 0.0174, + "step": 230 + }, + { + "epoch": 19.59, + "learning_rate": 0.00015999999999999999, + "loss": 0.0161, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": 0.3072655200958252, + "eval_runtime": 30.5563, + "eval_samples_per_second": 12.665, + "eval_steps_per_second": 1.604, + "step": 240 + }, + { + "epoch": 20.41, + "learning_rate": 0.00015, + "loss": 0.0153, + "step": 250 + }, + { + "epoch": 21.22, + "learning_rate": 0.00014, + "loss": 0.0147, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": 0.32090628147125244, + "eval_runtime": 30.6058, + "eval_samples_per_second": 12.645, + "eval_steps_per_second": 1.601, + "step": 260 + }, + { + "epoch": 22.04, + "learning_rate": 0.00013, + "loss": 0.0145, + "step": 270 + }, + { + "epoch": 22.86, + "learning_rate": 0.00011999999999999999, + "loss": 0.0139, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": 0.32418766617774963, + "eval_runtime": 30.6034, + "eval_samples_per_second": 12.646, + "eval_steps_per_second": 1.601, + "step": 280 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 7.129651003061699e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-280/training_args.bin b/checkpoint-280/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-280/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-300/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-300/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ffc9a984fbcf9cac896786fc27999347d2919db --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e3cf3df4f359cb3582956ecefdcf24500952731daf6369b1cbda41f4b52c567 +size 134433530 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..98a63567d42d546a4d446993b4ab41cfa436e022 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e75810a18f0cc86303a306692d0e54a64c603b93c97c59badd3d4426195c2f55 +size 14244 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..043065694100117ec9ec34d87d4119760c55d60e --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02a846f1b53d06b3a5385e00f3c382802f5449c10c5faa9bc984677d196316c2 +size 1064 diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..edb29b60b1d4c06e73b0745ccae0541edadf85e9 --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,321 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 24.489795918367346, + "eval_steps": 20, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + }, + { + "epoch": 13.88, + "learning_rate": 0.00023, + "loss": 0.0466, + "step": 170 + }, + { + "epoch": 14.69, + "learning_rate": 0.00021999999999999995, + "loss": 0.0365, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": 0.263688862323761, + "eval_runtime": 30.5969, + "eval_samples_per_second": 12.648, + "eval_steps_per_second": 1.601, + "step": 180 + }, + { + "epoch": 15.51, + "learning_rate": 0.00020999999999999998, + "loss": 0.029, + "step": 190 + }, + { + "epoch": 16.33, + "learning_rate": 0.00019999999999999998, + "loss": 0.0244, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": 0.28023940324783325, + "eval_runtime": 30.5334, + "eval_samples_per_second": 12.675, + "eval_steps_per_second": 1.605, + "step": 200 + }, + { + "epoch": 17.14, + "learning_rate": 0.00018999999999999998, + "loss": 0.0208, + "step": 210 + }, + { + "epoch": 17.96, + "learning_rate": 0.00017999999999999998, + "loss": 0.0186, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": 0.30082279443740845, + "eval_runtime": 30.6266, + "eval_samples_per_second": 12.636, + "eval_steps_per_second": 1.6, + "step": 220 + }, + { + "epoch": 18.78, + "learning_rate": 0.00016999999999999999, + "loss": 0.0174, + "step": 230 + }, + { + "epoch": 19.59, + "learning_rate": 0.00015999999999999999, + "loss": 0.0161, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": 0.3072655200958252, + "eval_runtime": 30.5563, + "eval_samples_per_second": 12.665, + "eval_steps_per_second": 1.604, + "step": 240 + }, + { + "epoch": 20.41, + "learning_rate": 0.00015, + "loss": 0.0153, + "step": 250 + }, + { + "epoch": 21.22, + "learning_rate": 0.00014, + "loss": 0.0147, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": 0.32090628147125244, + "eval_runtime": 30.6058, + "eval_samples_per_second": 12.645, + "eval_steps_per_second": 1.601, + "step": 260 + }, + { + "epoch": 22.04, + "learning_rate": 0.00013, + "loss": 0.0145, + "step": 270 + }, + { + "epoch": 22.86, + "learning_rate": 0.00011999999999999999, + "loss": 0.0139, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": 0.32418766617774963, + "eval_runtime": 30.6034, + "eval_samples_per_second": 12.646, + "eval_steps_per_second": 1.601, + "step": 280 + }, + { + "epoch": 23.67, + "learning_rate": 0.00010999999999999998, + "loss": 0.0138, + "step": 290 + }, + { + "epoch": 24.49, + "learning_rate": 9.999999999999999e-05, + "loss": 0.0135, + "step": 300 + }, + { + "epoch": 24.49, + "eval_loss": 0.33807095885276794, + "eval_runtime": 30.7038, + "eval_samples_per_second": 12.604, + "eval_steps_per_second": 1.596, + "step": 300 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 7.636760556082299e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-320/README.md b/checkpoint-320/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-320/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-320/adapter_config.json b/checkpoint-320/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-320/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-320/adapter_model.safetensors b/checkpoint-320/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-320/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-320/optimizer.pt b/checkpoint-320/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fb2b53bb63b0b5248a36f62527372bc1502c38a --- /dev/null +++ b/checkpoint-320/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45637047daf1e7482cf2d3ba2e078f6219bb94ac03523bc5b485e85a8a76702b +size 134433530 diff --git a/checkpoint-320/rng_state.pth b/checkpoint-320/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cee109f28c137ec30cc107f30902d61ef4c23d61 --- /dev/null +++ b/checkpoint-320/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d355e628c51a01ae3545bb9fd46d3c042e514f7453496c1bac8ada148bf0797 +size 14244 diff --git a/checkpoint-320/scheduler.pt b/checkpoint-320/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..44a7c96557d47c43ee760fe5e9084957e7d47bbb --- /dev/null +++ b/checkpoint-320/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6f90122c7166d476e722a6703ab94e5de6361f1d1a61a844fb6fd64429c85b +size 1064 diff --git a/checkpoint-320/trainer_state.json b/checkpoint-320/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f33ab48a10ea4325ed642ec16cf9c4943ac4b31e --- /dev/null +++ b/checkpoint-320/trainer_state.json @@ -0,0 +1,341 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 26.122448979591837, + "eval_steps": 20, + "global_step": 320, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + }, + { + "epoch": 13.88, + "learning_rate": 0.00023, + "loss": 0.0466, + "step": 170 + }, + { + "epoch": 14.69, + "learning_rate": 0.00021999999999999995, + "loss": 0.0365, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": 0.263688862323761, + "eval_runtime": 30.5969, + "eval_samples_per_second": 12.648, + "eval_steps_per_second": 1.601, + "step": 180 + }, + { + "epoch": 15.51, + "learning_rate": 0.00020999999999999998, + "loss": 0.029, + "step": 190 + }, + { + "epoch": 16.33, + "learning_rate": 0.00019999999999999998, + "loss": 0.0244, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": 0.28023940324783325, + "eval_runtime": 30.5334, + "eval_samples_per_second": 12.675, + "eval_steps_per_second": 1.605, + "step": 200 + }, + { + "epoch": 17.14, + "learning_rate": 0.00018999999999999998, + "loss": 0.0208, + "step": 210 + }, + { + "epoch": 17.96, + "learning_rate": 0.00017999999999999998, + "loss": 0.0186, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": 0.30082279443740845, + "eval_runtime": 30.6266, + "eval_samples_per_second": 12.636, + "eval_steps_per_second": 1.6, + "step": 220 + }, + { + "epoch": 18.78, + "learning_rate": 0.00016999999999999999, + "loss": 0.0174, + "step": 230 + }, + { + "epoch": 19.59, + "learning_rate": 0.00015999999999999999, + "loss": 0.0161, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": 0.3072655200958252, + "eval_runtime": 30.5563, + "eval_samples_per_second": 12.665, + "eval_steps_per_second": 1.604, + "step": 240 + }, + { + "epoch": 20.41, + "learning_rate": 0.00015, + "loss": 0.0153, + "step": 250 + }, + { + "epoch": 21.22, + "learning_rate": 0.00014, + "loss": 0.0147, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": 0.32090628147125244, + "eval_runtime": 30.6058, + "eval_samples_per_second": 12.645, + "eval_steps_per_second": 1.601, + "step": 260 + }, + { + "epoch": 22.04, + "learning_rate": 0.00013, + "loss": 0.0145, + "step": 270 + }, + { + "epoch": 22.86, + "learning_rate": 0.00011999999999999999, + "loss": 0.0139, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": 0.32418766617774963, + "eval_runtime": 30.6034, + "eval_samples_per_second": 12.646, + "eval_steps_per_second": 1.601, + "step": 280 + }, + { + "epoch": 23.67, + "learning_rate": 0.00010999999999999998, + "loss": 0.0138, + "step": 290 + }, + { + "epoch": 24.49, + "learning_rate": 9.999999999999999e-05, + "loss": 0.0135, + "step": 300 + }, + { + "epoch": 24.49, + "eval_loss": 0.33807095885276794, + "eval_runtime": 30.7038, + "eval_samples_per_second": 12.604, + "eval_steps_per_second": 1.596, + "step": 300 + }, + { + "epoch": 25.31, + "learning_rate": 8.999999999999999e-05, + "loss": 0.0134, + "step": 310 + }, + { + "epoch": 26.12, + "learning_rate": 7.999999999999999e-05, + "loss": 0.0133, + "step": 320 + }, + { + "epoch": 26.12, + "eval_loss": 0.33249062299728394, + "eval_runtime": 30.6095, + "eval_samples_per_second": 12.643, + "eval_steps_per_second": 1.601, + "step": 320 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 8.145091079134249e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-320/training_args.bin b/checkpoint-320/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-320/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-340/README.md b/checkpoint-340/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-340/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-340/adapter_config.json b/checkpoint-340/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-340/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-340/adapter_model.safetensors b/checkpoint-340/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-340/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-340/optimizer.pt b/checkpoint-340/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..60dac1f3eb4f614b901f88887a952bdb9ac98d9e --- /dev/null +++ b/checkpoint-340/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ffa13052be6167203d5489761109b8f562c7f50f0a59844970ef9eb32f5fa8e +size 134433530 diff --git a/checkpoint-340/rng_state.pth b/checkpoint-340/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..115bd5d89de3699c39949d910166326d09336419 --- /dev/null +++ b/checkpoint-340/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff591c203cff545ff957b8dd0a1a8f2f4a34769c838390ad53c0a683323ff1a +size 14244 diff --git a/checkpoint-340/scheduler.pt b/checkpoint-340/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce1f7af7d3957bf85581fa17216b0d13cafadfac --- /dev/null +++ b/checkpoint-340/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03376fa8bdbd1f35e302b4f6e9042b2c5f57834becd5c6d5f467088976873ddd +size 1064 diff --git a/checkpoint-340/trainer_state.json b/checkpoint-340/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0c63c3f397ede0b8b714da0bb12ce2ce27a80eeb --- /dev/null +++ b/checkpoint-340/trainer_state.json @@ -0,0 +1,361 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 27.755102040816325, + "eval_steps": 20, + "global_step": 340, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + }, + { + "epoch": 13.88, + "learning_rate": 0.00023, + "loss": 0.0466, + "step": 170 + }, + { + "epoch": 14.69, + "learning_rate": 0.00021999999999999995, + "loss": 0.0365, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": 0.263688862323761, + "eval_runtime": 30.5969, + "eval_samples_per_second": 12.648, + "eval_steps_per_second": 1.601, + "step": 180 + }, + { + "epoch": 15.51, + "learning_rate": 0.00020999999999999998, + "loss": 0.029, + "step": 190 + }, + { + "epoch": 16.33, + "learning_rate": 0.00019999999999999998, + "loss": 0.0244, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": 0.28023940324783325, + "eval_runtime": 30.5334, + "eval_samples_per_second": 12.675, + "eval_steps_per_second": 1.605, + "step": 200 + }, + { + "epoch": 17.14, + "learning_rate": 0.00018999999999999998, + "loss": 0.0208, + "step": 210 + }, + { + "epoch": 17.96, + "learning_rate": 0.00017999999999999998, + "loss": 0.0186, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": 0.30082279443740845, + "eval_runtime": 30.6266, + "eval_samples_per_second": 12.636, + "eval_steps_per_second": 1.6, + "step": 220 + }, + { + "epoch": 18.78, + "learning_rate": 0.00016999999999999999, + "loss": 0.0174, + "step": 230 + }, + { + "epoch": 19.59, + "learning_rate": 0.00015999999999999999, + "loss": 0.0161, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": 0.3072655200958252, + "eval_runtime": 30.5563, + "eval_samples_per_second": 12.665, + "eval_steps_per_second": 1.604, + "step": 240 + }, + { + "epoch": 20.41, + "learning_rate": 0.00015, + "loss": 0.0153, + "step": 250 + }, + { + "epoch": 21.22, + "learning_rate": 0.00014, + "loss": 0.0147, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": 0.32090628147125244, + "eval_runtime": 30.6058, + "eval_samples_per_second": 12.645, + "eval_steps_per_second": 1.601, + "step": 260 + }, + { + "epoch": 22.04, + "learning_rate": 0.00013, + "loss": 0.0145, + "step": 270 + }, + { + "epoch": 22.86, + "learning_rate": 0.00011999999999999999, + "loss": 0.0139, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": 0.32418766617774963, + "eval_runtime": 30.6034, + "eval_samples_per_second": 12.646, + "eval_steps_per_second": 1.601, + "step": 280 + }, + { + "epoch": 23.67, + "learning_rate": 0.00010999999999999998, + "loss": 0.0138, + "step": 290 + }, + { + "epoch": 24.49, + "learning_rate": 9.999999999999999e-05, + "loss": 0.0135, + "step": 300 + }, + { + "epoch": 24.49, + "eval_loss": 0.33807095885276794, + "eval_runtime": 30.7038, + "eval_samples_per_second": 12.604, + "eval_steps_per_second": 1.596, + "step": 300 + }, + { + "epoch": 25.31, + "learning_rate": 8.999999999999999e-05, + "loss": 0.0134, + "step": 310 + }, + { + "epoch": 26.12, + "learning_rate": 7.999999999999999e-05, + "loss": 0.0133, + "step": 320 + }, + { + "epoch": 26.12, + "eval_loss": 0.33249062299728394, + "eval_runtime": 30.6095, + "eval_samples_per_second": 12.643, + "eval_steps_per_second": 1.601, + "step": 320 + }, + { + "epoch": 26.94, + "learning_rate": 7e-05, + "loss": 0.0131, + "step": 330 + }, + { + "epoch": 27.76, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.0131, + "step": 340 + }, + { + "epoch": 27.76, + "eval_loss": 0.3382267951965332, + "eval_runtime": 30.6161, + "eval_samples_per_second": 12.64, + "eval_steps_per_second": 1.6, + "step": 340 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 8.656372279761961e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-340/training_args.bin b/checkpoint-340/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-340/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-360/README.md b/checkpoint-360/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-360/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-360/adapter_config.json b/checkpoint-360/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-360/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-360/adapter_model.safetensors b/checkpoint-360/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-360/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-360/optimizer.pt b/checkpoint-360/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2658d68d2c9bb90e69a9be38d860e5404c51bbc --- /dev/null +++ b/checkpoint-360/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42433c2ad3b767f694a3d52fde3e75a3a3067925ba323c30660e415ed8f70c07 +size 134433530 diff --git a/checkpoint-360/rng_state.pth b/checkpoint-360/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c542bff9c66db1a37f21389c94ad802cfb86cf1c --- /dev/null +++ b/checkpoint-360/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf9970a6d619d072a6b191c23ade92fc70e0889209ff12033fbef950f20c7c7f +size 14244 diff --git a/checkpoint-360/scheduler.pt b/checkpoint-360/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f81351e650ea7c1de881065d6d8cb01c4bf1781 --- /dev/null +++ b/checkpoint-360/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35452c1466d786926545eee1f0187be30d79bc322f32d2395b7460c9b050bbbe +size 1064 diff --git a/checkpoint-360/trainer_state.json b/checkpoint-360/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..08dbb1c47d5150ca2735a05b6ce955f982a19936 --- /dev/null +++ b/checkpoint-360/trainer_state.json @@ -0,0 +1,381 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 29.387755102040817, + "eval_steps": 20, + "global_step": 360, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + }, + { + "epoch": 13.88, + "learning_rate": 0.00023, + "loss": 0.0466, + "step": 170 + }, + { + "epoch": 14.69, + "learning_rate": 0.00021999999999999995, + "loss": 0.0365, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": 0.263688862323761, + "eval_runtime": 30.5969, + "eval_samples_per_second": 12.648, + "eval_steps_per_second": 1.601, + "step": 180 + }, + { + "epoch": 15.51, + "learning_rate": 0.00020999999999999998, + "loss": 0.029, + "step": 190 + }, + { + "epoch": 16.33, + "learning_rate": 0.00019999999999999998, + "loss": 0.0244, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": 0.28023940324783325, + "eval_runtime": 30.5334, + "eval_samples_per_second": 12.675, + "eval_steps_per_second": 1.605, + "step": 200 + }, + { + "epoch": 17.14, + "learning_rate": 0.00018999999999999998, + "loss": 0.0208, + "step": 210 + }, + { + "epoch": 17.96, + "learning_rate": 0.00017999999999999998, + "loss": 0.0186, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": 0.30082279443740845, + "eval_runtime": 30.6266, + "eval_samples_per_second": 12.636, + "eval_steps_per_second": 1.6, + "step": 220 + }, + { + "epoch": 18.78, + "learning_rate": 0.00016999999999999999, + "loss": 0.0174, + "step": 230 + }, + { + "epoch": 19.59, + "learning_rate": 0.00015999999999999999, + "loss": 0.0161, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": 0.3072655200958252, + "eval_runtime": 30.5563, + "eval_samples_per_second": 12.665, + "eval_steps_per_second": 1.604, + "step": 240 + }, + { + "epoch": 20.41, + "learning_rate": 0.00015, + "loss": 0.0153, + "step": 250 + }, + { + "epoch": 21.22, + "learning_rate": 0.00014, + "loss": 0.0147, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": 0.32090628147125244, + "eval_runtime": 30.6058, + "eval_samples_per_second": 12.645, + "eval_steps_per_second": 1.601, + "step": 260 + }, + { + "epoch": 22.04, + "learning_rate": 0.00013, + "loss": 0.0145, + "step": 270 + }, + { + "epoch": 22.86, + "learning_rate": 0.00011999999999999999, + "loss": 0.0139, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": 0.32418766617774963, + "eval_runtime": 30.6034, + "eval_samples_per_second": 12.646, + "eval_steps_per_second": 1.601, + "step": 280 + }, + { + "epoch": 23.67, + "learning_rate": 0.00010999999999999998, + "loss": 0.0138, + "step": 290 + }, + { + "epoch": 24.49, + "learning_rate": 9.999999999999999e-05, + "loss": 0.0135, + "step": 300 + }, + { + "epoch": 24.49, + "eval_loss": 0.33807095885276794, + "eval_runtime": 30.7038, + "eval_samples_per_second": 12.604, + "eval_steps_per_second": 1.596, + "step": 300 + }, + { + "epoch": 25.31, + "learning_rate": 8.999999999999999e-05, + "loss": 0.0134, + "step": 310 + }, + { + "epoch": 26.12, + "learning_rate": 7.999999999999999e-05, + "loss": 0.0133, + "step": 320 + }, + { + "epoch": 26.12, + "eval_loss": 0.33249062299728394, + "eval_runtime": 30.6095, + "eval_samples_per_second": 12.643, + "eval_steps_per_second": 1.601, + "step": 320 + }, + { + "epoch": 26.94, + "learning_rate": 7e-05, + "loss": 0.0131, + "step": 330 + }, + { + "epoch": 27.76, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.0131, + "step": 340 + }, + { + "epoch": 27.76, + "eval_loss": 0.3382267951965332, + "eval_runtime": 30.6161, + "eval_samples_per_second": 12.64, + "eval_steps_per_second": 1.6, + "step": 340 + }, + { + "epoch": 28.57, + "learning_rate": 4.9999999999999996e-05, + "loss": 0.0128, + "step": 350 + }, + { + "epoch": 29.39, + "learning_rate": 3.9999999999999996e-05, + "loss": 0.0127, + "step": 360 + }, + { + "epoch": 29.39, + "eval_loss": 0.3383706212043762, + "eval_runtime": 30.6092, + "eval_samples_per_second": 12.643, + "eval_steps_per_second": 1.601, + "step": 360 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 9.164499307808686e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-360/training_args.bin b/checkpoint-360/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-360/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-380/README.md b/checkpoint-380/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-380/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-380/adapter_config.json b/checkpoint-380/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-380/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-380/adapter_model.safetensors b/checkpoint-380/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-380/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-380/optimizer.pt b/checkpoint-380/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..215b87358d301ec386af24b6d8c0b7846b4c368d --- /dev/null +++ b/checkpoint-380/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17d0f2212a67ccd14e923eb9f7ea7c84433ada16bc03042aa93ab7265b6efa0d +size 134433530 diff --git a/checkpoint-380/rng_state.pth b/checkpoint-380/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f8ede8d02fbacb2342bd4cc9962bc82f8c3796ec --- /dev/null +++ b/checkpoint-380/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69fd21e8439091aa27da485823c5b481e539f212a19479a1e2135a18b2132f4d +size 14244 diff --git a/checkpoint-380/scheduler.pt b/checkpoint-380/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0e2655e310c92954c1a1e203e6229349da342e9 --- /dev/null +++ b/checkpoint-380/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a39e2df26b3f3572b475720558ff8a2e0c169132c8cb9a6d4c54b36340352b7 +size 1064 diff --git a/checkpoint-380/trainer_state.json b/checkpoint-380/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0c8c16e1e9dc09717170946f7dd8e47c8849d6af --- /dev/null +++ b/checkpoint-380/trainer_state.json @@ -0,0 +1,401 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 31.020408163265305, + "eval_steps": 20, + "global_step": 380, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + }, + { + "epoch": 13.88, + "learning_rate": 0.00023, + "loss": 0.0466, + "step": 170 + }, + { + "epoch": 14.69, + "learning_rate": 0.00021999999999999995, + "loss": 0.0365, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": 0.263688862323761, + "eval_runtime": 30.5969, + "eval_samples_per_second": 12.648, + "eval_steps_per_second": 1.601, + "step": 180 + }, + { + "epoch": 15.51, + "learning_rate": 0.00020999999999999998, + "loss": 0.029, + "step": 190 + }, + { + "epoch": 16.33, + "learning_rate": 0.00019999999999999998, + "loss": 0.0244, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": 0.28023940324783325, + "eval_runtime": 30.5334, + "eval_samples_per_second": 12.675, + "eval_steps_per_second": 1.605, + "step": 200 + }, + { + "epoch": 17.14, + "learning_rate": 0.00018999999999999998, + "loss": 0.0208, + "step": 210 + }, + { + "epoch": 17.96, + "learning_rate": 0.00017999999999999998, + "loss": 0.0186, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": 0.30082279443740845, + "eval_runtime": 30.6266, + "eval_samples_per_second": 12.636, + "eval_steps_per_second": 1.6, + "step": 220 + }, + { + "epoch": 18.78, + "learning_rate": 0.00016999999999999999, + "loss": 0.0174, + "step": 230 + }, + { + "epoch": 19.59, + "learning_rate": 0.00015999999999999999, + "loss": 0.0161, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": 0.3072655200958252, + "eval_runtime": 30.5563, + "eval_samples_per_second": 12.665, + "eval_steps_per_second": 1.604, + "step": 240 + }, + { + "epoch": 20.41, + "learning_rate": 0.00015, + "loss": 0.0153, + "step": 250 + }, + { + "epoch": 21.22, + "learning_rate": 0.00014, + "loss": 0.0147, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": 0.32090628147125244, + "eval_runtime": 30.6058, + "eval_samples_per_second": 12.645, + "eval_steps_per_second": 1.601, + "step": 260 + }, + { + "epoch": 22.04, + "learning_rate": 0.00013, + "loss": 0.0145, + "step": 270 + }, + { + "epoch": 22.86, + "learning_rate": 0.00011999999999999999, + "loss": 0.0139, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": 0.32418766617774963, + "eval_runtime": 30.6034, + "eval_samples_per_second": 12.646, + "eval_steps_per_second": 1.601, + "step": 280 + }, + { + "epoch": 23.67, + "learning_rate": 0.00010999999999999998, + "loss": 0.0138, + "step": 290 + }, + { + "epoch": 24.49, + "learning_rate": 9.999999999999999e-05, + "loss": 0.0135, + "step": 300 + }, + { + "epoch": 24.49, + "eval_loss": 0.33807095885276794, + "eval_runtime": 30.7038, + "eval_samples_per_second": 12.604, + "eval_steps_per_second": 1.596, + "step": 300 + }, + { + "epoch": 25.31, + "learning_rate": 8.999999999999999e-05, + "loss": 0.0134, + "step": 310 + }, + { + "epoch": 26.12, + "learning_rate": 7.999999999999999e-05, + "loss": 0.0133, + "step": 320 + }, + { + "epoch": 26.12, + "eval_loss": 0.33249062299728394, + "eval_runtime": 30.6095, + "eval_samples_per_second": 12.643, + "eval_steps_per_second": 1.601, + "step": 320 + }, + { + "epoch": 26.94, + "learning_rate": 7e-05, + "loss": 0.0131, + "step": 330 + }, + { + "epoch": 27.76, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.0131, + "step": 340 + }, + { + "epoch": 27.76, + "eval_loss": 0.3382267951965332, + "eval_runtime": 30.6161, + "eval_samples_per_second": 12.64, + "eval_steps_per_second": 1.6, + "step": 340 + }, + { + "epoch": 28.57, + "learning_rate": 4.9999999999999996e-05, + "loss": 0.0128, + "step": 350 + }, + { + "epoch": 29.39, + "learning_rate": 3.9999999999999996e-05, + "loss": 0.0127, + "step": 360 + }, + { + "epoch": 29.39, + "eval_loss": 0.3383706212043762, + "eval_runtime": 30.6092, + "eval_samples_per_second": 12.643, + "eval_steps_per_second": 1.601, + "step": 360 + }, + { + "epoch": 30.2, + "learning_rate": 2.9999999999999997e-05, + "loss": 0.0126, + "step": 370 + }, + { + "epoch": 31.02, + "learning_rate": 1.9999999999999998e-05, + "loss": 0.0126, + "step": 380 + }, + { + "epoch": 31.02, + "eval_loss": 0.344478040933609, + "eval_runtime": 30.6226, + "eval_samples_per_second": 12.638, + "eval_steps_per_second": 1.6, + "step": 380 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 9.671507113326674e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-380/training_args.bin b/checkpoint-380/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-380/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-40/README.md b/checkpoint-40/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-40/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-40/adapter_config.json b/checkpoint-40/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-40/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-40/adapter_model.safetensors b/checkpoint-40/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-40/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-40/optimizer.pt b/checkpoint-40/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..01a3d8d51ce4f916c79447d29100da1335d2dcac --- /dev/null +++ b/checkpoint-40/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4acdecf6b8eed3f7941d86157ddffc855d6f053c1bdf15e4ba2e0777b3ac843b +size 134433530 diff --git a/checkpoint-40/rng_state.pth b/checkpoint-40/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..793ac908919ee253d772b98a9fc601d02ee029e3 --- /dev/null +++ b/checkpoint-40/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b19d778ab5f3774f61bde0ab2f560b8f5e289959eca4c1133c7e0e2a88e15222 +size 14244 diff --git a/checkpoint-40/scheduler.pt b/checkpoint-40/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d1c414264d7a61f0eab2a577c1b2d29d2015e3c --- /dev/null +++ b/checkpoint-40/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2c8a6a21ca956c9d84f17aca6da97a5c2b2ba1ba5d1e4f431e18bc47f94ca32 +size 1064 diff --git a/checkpoint-40/trainer_state.json b/checkpoint-40/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5993f91eba64e95f3f5cb8438e1f22fb0c7efae2 --- /dev/null +++ b/checkpoint-40/trainer_state.json @@ -0,0 +1,61 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.2653061224489797, + "eval_steps": 20, + "global_step": 40, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 1.0190012386639872e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-40/training_args.bin b/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-400/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-400/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-400/adapter_model.safetensors b/checkpoint-400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0cd4663fba387d2de0f23e14899bb7b386213f6 --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8b3c4eadbe61a7d85b65eab157457387e3b96e01f3cb356134dfe57c3884130 +size 134433530 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..54167400fd638ff51579bd63bd6a76a254f553e4 --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bb5b838f3fb19e53c06050101b9c18f09f64255d2663c2c7b8fa4136614074d +size 14244 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..62c7d6ea1655a58e9a5460c3034b46e354a34af4 --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee665d99b8d4ac37b6829a57abd01a01763b04846f27bc645d525d70173d6821 +size 1064 diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f9ed22547f53e6a5bbd08c4cefbb93f39b945ca8 --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,421 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 32.6530612244898, + "eval_steps": 20, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + }, + { + "epoch": 7.35, + "learning_rate": 0.00027, + "loss": 0.2464, + "step": 90 + }, + { + "epoch": 8.16, + "learning_rate": 0.0003, + "loss": 0.2276, + "step": 100 + }, + { + "epoch": 8.16, + "eval_loss": 0.22304466366767883, + "eval_runtime": 30.6362, + "eval_samples_per_second": 12.632, + "eval_steps_per_second": 1.599, + "step": 100 + }, + { + "epoch": 8.98, + "learning_rate": 0.00029, + "loss": 0.1993, + "step": 110 + }, + { + "epoch": 9.8, + "learning_rate": 0.00028, + "loss": 0.1747, + "step": 120 + }, + { + "epoch": 9.8, + "eval_loss": 0.20949400961399078, + "eval_runtime": 30.5837, + "eval_samples_per_second": 12.654, + "eval_steps_per_second": 1.602, + "step": 120 + }, + { + "epoch": 10.61, + "learning_rate": 0.00027, + "loss": 0.1408, + "step": 130 + }, + { + "epoch": 11.43, + "learning_rate": 0.00026, + "loss": 0.1131, + "step": 140 + }, + { + "epoch": 11.43, + "eval_loss": 0.20713694393634796, + "eval_runtime": 30.5887, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.602, + "step": 140 + }, + { + "epoch": 12.24, + "learning_rate": 0.00025, + "loss": 0.0839, + "step": 150 + }, + { + "epoch": 13.06, + "learning_rate": 0.00023999999999999998, + "loss": 0.0644, + "step": 160 + }, + { + "epoch": 13.06, + "eval_loss": 0.2342994511127472, + "eval_runtime": 30.5213, + "eval_samples_per_second": 12.68, + "eval_steps_per_second": 1.605, + "step": 160 + }, + { + "epoch": 13.88, + "learning_rate": 0.00023, + "loss": 0.0466, + "step": 170 + }, + { + "epoch": 14.69, + "learning_rate": 0.00021999999999999995, + "loss": 0.0365, + "step": 180 + }, + { + "epoch": 14.69, + "eval_loss": 0.263688862323761, + "eval_runtime": 30.5969, + "eval_samples_per_second": 12.648, + "eval_steps_per_second": 1.601, + "step": 180 + }, + { + "epoch": 15.51, + "learning_rate": 0.00020999999999999998, + "loss": 0.029, + "step": 190 + }, + { + "epoch": 16.33, + "learning_rate": 0.00019999999999999998, + "loss": 0.0244, + "step": 200 + }, + { + "epoch": 16.33, + "eval_loss": 0.28023940324783325, + "eval_runtime": 30.5334, + "eval_samples_per_second": 12.675, + "eval_steps_per_second": 1.605, + "step": 200 + }, + { + "epoch": 17.14, + "learning_rate": 0.00018999999999999998, + "loss": 0.0208, + "step": 210 + }, + { + "epoch": 17.96, + "learning_rate": 0.00017999999999999998, + "loss": 0.0186, + "step": 220 + }, + { + "epoch": 17.96, + "eval_loss": 0.30082279443740845, + "eval_runtime": 30.6266, + "eval_samples_per_second": 12.636, + "eval_steps_per_second": 1.6, + "step": 220 + }, + { + "epoch": 18.78, + "learning_rate": 0.00016999999999999999, + "loss": 0.0174, + "step": 230 + }, + { + "epoch": 19.59, + "learning_rate": 0.00015999999999999999, + "loss": 0.0161, + "step": 240 + }, + { + "epoch": 19.59, + "eval_loss": 0.3072655200958252, + "eval_runtime": 30.5563, + "eval_samples_per_second": 12.665, + "eval_steps_per_second": 1.604, + "step": 240 + }, + { + "epoch": 20.41, + "learning_rate": 0.00015, + "loss": 0.0153, + "step": 250 + }, + { + "epoch": 21.22, + "learning_rate": 0.00014, + "loss": 0.0147, + "step": 260 + }, + { + "epoch": 21.22, + "eval_loss": 0.32090628147125244, + "eval_runtime": 30.6058, + "eval_samples_per_second": 12.645, + "eval_steps_per_second": 1.601, + "step": 260 + }, + { + "epoch": 22.04, + "learning_rate": 0.00013, + "loss": 0.0145, + "step": 270 + }, + { + "epoch": 22.86, + "learning_rate": 0.00011999999999999999, + "loss": 0.0139, + "step": 280 + }, + { + "epoch": 22.86, + "eval_loss": 0.32418766617774963, + "eval_runtime": 30.6034, + "eval_samples_per_second": 12.646, + "eval_steps_per_second": 1.601, + "step": 280 + }, + { + "epoch": 23.67, + "learning_rate": 0.00010999999999999998, + "loss": 0.0138, + "step": 290 + }, + { + "epoch": 24.49, + "learning_rate": 9.999999999999999e-05, + "loss": 0.0135, + "step": 300 + }, + { + "epoch": 24.49, + "eval_loss": 0.33807095885276794, + "eval_runtime": 30.7038, + "eval_samples_per_second": 12.604, + "eval_steps_per_second": 1.596, + "step": 300 + }, + { + "epoch": 25.31, + "learning_rate": 8.999999999999999e-05, + "loss": 0.0134, + "step": 310 + }, + { + "epoch": 26.12, + "learning_rate": 7.999999999999999e-05, + "loss": 0.0133, + "step": 320 + }, + { + "epoch": 26.12, + "eval_loss": 0.33249062299728394, + "eval_runtime": 30.6095, + "eval_samples_per_second": 12.643, + "eval_steps_per_second": 1.601, + "step": 320 + }, + { + "epoch": 26.94, + "learning_rate": 7e-05, + "loss": 0.0131, + "step": 330 + }, + { + "epoch": 27.76, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.0131, + "step": 340 + }, + { + "epoch": 27.76, + "eval_loss": 0.3382267951965332, + "eval_runtime": 30.6161, + "eval_samples_per_second": 12.64, + "eval_steps_per_second": 1.6, + "step": 340 + }, + { + "epoch": 28.57, + "learning_rate": 4.9999999999999996e-05, + "loss": 0.0128, + "step": 350 + }, + { + "epoch": 29.39, + "learning_rate": 3.9999999999999996e-05, + "loss": 0.0127, + "step": 360 + }, + { + "epoch": 29.39, + "eval_loss": 0.3383706212043762, + "eval_runtime": 30.6092, + "eval_samples_per_second": 12.643, + "eval_steps_per_second": 1.601, + "step": 360 + }, + { + "epoch": 30.2, + "learning_rate": 2.9999999999999997e-05, + "loss": 0.0126, + "step": 370 + }, + { + "epoch": 31.02, + "learning_rate": 1.9999999999999998e-05, + "loss": 0.0126, + "step": 380 + }, + { + "epoch": 31.02, + "eval_loss": 0.344478040933609, + "eval_runtime": 30.6226, + "eval_samples_per_second": 12.638, + "eval_steps_per_second": 1.6, + "step": 380 + }, + { + "epoch": 31.84, + "learning_rate": 9.999999999999999e-06, + "loss": 0.0124, + "step": 390 + }, + { + "epoch": 32.65, + "learning_rate": 0.0, + "loss": 0.0124, + "step": 400 + }, + { + "epoch": 32.65, + "eval_loss": 0.34107527136802673, + "eval_runtime": 30.6055, + "eval_samples_per_second": 12.645, + "eval_steps_per_second": 1.601, + "step": 400 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 1.0183500546472673e+18, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-60/README.md b/checkpoint-60/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-60/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-60/adapter_config.json b/checkpoint-60/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-60/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-60/adapter_model.safetensors b/checkpoint-60/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-60/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-60/optimizer.pt b/checkpoint-60/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..50b05021a3b749f841039a29972d04360fed67e7 --- /dev/null +++ b/checkpoint-60/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:958f88114adb65c9c19c715af6e5875bfcaf089b9f6696ddd679c10d57c7fcf6 +size 134433530 diff --git a/checkpoint-60/rng_state.pth b/checkpoint-60/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7322bd8e79a2c8f4a9527b27eb9a42451f771df3 --- /dev/null +++ b/checkpoint-60/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd15f4ecfa03d238567de15a74c824ebdb43597f21a1efa5619dc326567c207 +size 14244 diff --git a/checkpoint-60/scheduler.pt b/checkpoint-60/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ccc7f4b428abbeedfc6798ac0a82ddf117ba2dfc --- /dev/null +++ b/checkpoint-60/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d0406405dc2799f6a205a30a40ceac73b9e2fdb57b3e7109b27235b06006ef +size 1064 diff --git a/checkpoint-60/trainer_state.json b/checkpoint-60/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..50c485b47137541cc1f712b5705d4962f0a40333 --- /dev/null +++ b/checkpoint-60/trainer_state.json @@ -0,0 +1,81 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.8979591836734695, + "eval_steps": 20, + "global_step": 60, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 1.530485934296924e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-60/training_args.bin b/checkpoint-60/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-60/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/checkpoint-80/README.md b/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d576225237c31388675b8b3bfa0fb4ebbdd7bce --- /dev/null +++ b/checkpoint-80/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: codellama/CodeLlama-7b-hf +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-80/adapter_config.json b/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c61867a61ecc9c9fd8404d4a960ef8dc7fc2f5b --- /dev/null +++ b/checkpoint-80/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "codellama/CodeLlama-7b-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-80/adapter_model.safetensors b/checkpoint-80/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b7377ced059703efbf7179779a269234116eb75 --- /dev/null +++ b/checkpoint-80/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41 +size 48 diff --git a/checkpoint-80/optimizer.pt b/checkpoint-80/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7eb79342cbbb1dfffa074854ed001f7c081eb223 --- /dev/null +++ b/checkpoint-80/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c29cd4bc1d552109d498fb280cf34fd473f20bac45658dc55ed574e21d5576 +size 134433530 diff --git a/checkpoint-80/rng_state.pth b/checkpoint-80/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85d2931c8946edaae4e58fa7b47dcd8db2bb779c --- /dev/null +++ b/checkpoint-80/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2734667f2060f7dc104d2320d15836b6f8ca316b284ba4f1ea3b2df7d2c5849c +size 14244 diff --git a/checkpoint-80/scheduler.pt b/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee345e8a659f9d4e86e79b79c6e415ea95b6fa42 --- /dev/null +++ b/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21d82a70de51a4166c824e4076d761aaf8a8967df5c1cd7fdce99da5c3b5bc50 +size 1064 diff --git a/checkpoint-80/trainer_state.json b/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bc35705ff6035f105aed5fe92b2e5b3b773a15b7 --- /dev/null +++ b/checkpoint-80/trainer_state.json @@ -0,0 +1,101 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.530612244897959, + "eval_steps": 20, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.82, + "learning_rate": 2.9999999999999997e-05, + "loss": 1.7509, + "step": 10 + }, + { + "epoch": 1.63, + "learning_rate": 5.9999999999999995e-05, + "loss": 1.6967, + "step": 20 + }, + { + "epoch": 1.63, + "eval_loss": 1.6115258932113647, + "eval_runtime": 30.6688, + "eval_samples_per_second": 12.619, + "eval_steps_per_second": 1.598, + "step": 20 + }, + { + "epoch": 2.45, + "learning_rate": 8.999999999999999e-05, + "loss": 1.5138, + "step": 30 + }, + { + "epoch": 3.27, + "learning_rate": 0.00011999999999999999, + "loss": 1.0417, + "step": 40 + }, + { + "epoch": 3.27, + "eval_loss": 0.5990618467330933, + "eval_runtime": 45.282, + "eval_samples_per_second": 8.546, + "eval_steps_per_second": 1.082, + "step": 40 + }, + { + "epoch": 4.08, + "learning_rate": 0.00015, + "loss": 0.4159, + "step": 50 + }, + { + "epoch": 4.9, + "learning_rate": 0.00017999999999999998, + "loss": 0.3112, + "step": 60 + }, + { + "epoch": 4.9, + "eval_loss": 0.277026891708374, + "eval_runtime": 30.7302, + "eval_samples_per_second": 12.593, + "eval_steps_per_second": 1.595, + "step": 60 + }, + { + "epoch": 5.71, + "learning_rate": 0.00020999999999999998, + "loss": 0.285, + "step": 70 + }, + { + "epoch": 6.53, + "learning_rate": 0.00023999999999999998, + "loss": 0.27, + "step": 80 + }, + { + "epoch": 6.53, + "eval_loss": 0.24872899055480957, + "eval_runtime": 30.6498, + "eval_samples_per_second": 12.627, + "eval_steps_per_second": 1.599, + "step": 80 + } + ], + "logging_steps": 10, + "max_steps": 400, + "num_input_tokens_seen": 0, + "num_train_epochs": 34, + "save_steps": 20, + "total_flos": 2.0387147098462618e+17, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-80/training_args.bin b/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8845a8a8e5c06aad176bec0bbc97b40e881615fa --- /dev/null +++ b/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8d1856a7e88d5d206461b4fada83bde140b82a5a28df09559656cee017c5bce +size 4664 diff --git a/runs/Feb21_04-12-12_nlembhrwvg/events.out.tfevents.1708488736.nlembhrwvg.993.0 b/runs/Feb21_04-12-12_nlembhrwvg/events.out.tfevents.1708488736.nlembhrwvg.993.0 new file mode 100644 index 0000000000000000000000000000000000000000..bce17ae1dda8583a9da51b344921b8b6570781f0 --- /dev/null +++ b/runs/Feb21_04-12-12_nlembhrwvg/events.out.tfevents.1708488736.nlembhrwvg.993.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ee073666ab0d3ecd20811dba55e6c0974311b0c8dbf1fd1566e9ff089c41b6d +size 16734