mtasic85 commited on
Commit
94081ee
1 Parent(s): 727041c

train model

Browse files
scripts/model.yaml CHANGED
@@ -44,10 +44,11 @@ resume: "auto"
44
  # Data-related arguments. If not provided, the default is ``litgpt.data.TinyLlama``.
45
  data:
46
  class_path: LitData
 
47
  init_args:
48
  data_path: "../data/"
49
- # num_workers: 16
50
- num_workers: 3
51
 
52
  # Training-related arguments. See ``litgpt.args.TrainArgs`` for details
53
  train:
@@ -61,8 +62,8 @@ train:
61
  global_batch_size: 512
62
 
63
  # Number of samples per data-parallel rank (type: int, default: 4)
64
- # micro_batch_size: 16
65
- micro_batch_size: 14
66
 
67
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
68
  lr_warmup_steps: 2000
@@ -108,9 +109,9 @@ eval:
108
 
109
  # Optimizer-related arguments
110
  optimizer:
111
- class_path: torch.optim.AdamW
112
  # class_path: bitsandbytes.optim.PagedAdamW
113
- # class_path: bitsandbytes.optim.AdamW8bit
114
  # class_path: bitsandbytes.optim.PagedAdamW8bit
115
 
116
  init_args:
 
44
  # Data-related arguments. If not provided, the default is ``litgpt.data.TinyLlama``.
45
  data:
46
  class_path: LitData
47
+
48
  init_args:
49
  data_path: "../data/"
50
+ num_workers: 16
51
+ # num_workers: 3
52
 
53
  # Training-related arguments. See ``litgpt.args.TrainArgs`` for details
54
  train:
 
62
  global_batch_size: 512
63
 
64
  # Number of samples per data-parallel rank (type: int, default: 4)
65
+ micro_batch_size: 16
66
+ # micro_batch_size: 14
67
 
68
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
69
  lr_warmup_steps: 2000
 
109
 
110
  # Optimizer-related arguments
111
  optimizer:
112
+ # class_path: torch.optim.AdamW
113
  # class_path: bitsandbytes.optim.PagedAdamW
114
+ class_path: bitsandbytes.optim.AdamW8bit
115
  # class_path: bitsandbytes.optim.PagedAdamW8bit
116
 
117
  init_args:
scripts/requirements-lit.in CHANGED
@@ -5,6 +5,7 @@ jinja2
5
  transformers
6
  bitsandbytes
7
  wandb
8
- litgpt[all]
 
9
  litdata
10
  grokadamw
 
5
  transformers
6
  bitsandbytes
7
  wandb
8
+ # litgpt[all]
9
+ litgpt[all] @ git+https://github.com/mtasic85/litgpt.git
10
  litdata
11
  grokadamw