manojpreveen commited on
Commit
3bec671
1 Parent(s): b303706

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +38 -2
README.md CHANGED
@@ -3,6 +3,42 @@ license: mit
3
  datasets:
4
  - manojpreveen/Orca
5
  ---
6
- Data : 100k from 1M Gpt-4 Orca data
7
 
8
- Learning rate : 1e-5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  datasets:
4
  - manojpreveen/Orca
5
  ---
 
6
 
7
+ **Base model :** meta-llama/Llama-2-70b-hf
8
+
9
+ **Data :** 100k from 1M Gpt-4 Orca data
10
+
11
+ **Training Params :**
12
+ batch_size_training: '16'
13
+ checkpoint_type: StateDictType.FULL_STATE_DICT
14
+ dataset: orca_dolphin_100k_gpt4
15
+ dist_checkpoint_folder: fine-tuned
16
+ dist_checkpoint_root_folder: model_checkpoints
17
+ enable_fsdp: 'True'
18
+ freeze_layers: 'False'
19
+ fsdp_activation_checkpointing: 'True'
20
+ gamma: '0.85'
21
+ low_cpu_fsdp: 'True'
22
+ lr: 1e-05
23
+ micro_batch_size: '16'
24
+ mixed_precision: 'True'
25
+ model_name: meta-llama/Llama-2-70b-chat-hf
26
+ num_epochs: '1'
27
+ num_freeze_layers: '1'
28
+ num_workers_dataloader: '1'
29
+ one_gpu: 'False'
30
+ optimizer: anyprecision
31
+ output_dir: ~/llama-recipes-70b/output
32
+ peft_method: lora
33
+ pure_bf16: 'True'
34
+ quantization: 'False'
35
+ run_validation: 'True'
36
+ save_model: 'True'
37
+ save_optimizer: 'True'
38
+ seed: '42'
39
+ sharding_strategy: ShardingStrategy.FULL_SHARD
40
+ use_fast_kernels: (False,)
41
+ use_fp16: 'False'
42
+ use_peft: 'False'
43
+ val_batch_size: '16'
44
+ weight_decay: '0.0'