train model
Browse files- scripts/model.yaml +7 -6
- scripts/requirements-lit.in +2 -1
scripts/model.yaml
CHANGED
@@ -44,10 +44,11 @@ resume: "auto"
|
|
44 |
# Data-related arguments. If not provided, the default is ``litgpt.data.TinyLlama``.
|
45 |
data:
|
46 |
class_path: LitData
|
|
|
47 |
init_args:
|
48 |
data_path: "../data/"
|
49 |
-
|
50 |
-
num_workers: 3
|
51 |
|
52 |
# Training-related arguments. See ``litgpt.args.TrainArgs`` for details
|
53 |
train:
|
@@ -61,8 +62,8 @@ train:
|
|
61 |
global_batch_size: 512
|
62 |
|
63 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
64 |
-
|
65 |
-
micro_batch_size: 14
|
66 |
|
67 |
# Number of iterations with learning rate warmup active (type: int, default: 2000)
|
68 |
lr_warmup_steps: 2000
|
@@ -108,9 +109,9 @@ eval:
|
|
108 |
|
109 |
# Optimizer-related arguments
|
110 |
optimizer:
|
111 |
-
class_path: torch.optim.AdamW
|
112 |
# class_path: bitsandbytes.optim.PagedAdamW
|
113 |
-
|
114 |
# class_path: bitsandbytes.optim.PagedAdamW8bit
|
115 |
|
116 |
init_args:
|
|
|
44 |
# Data-related arguments. If not provided, the default is ``litgpt.data.TinyLlama``.
|
45 |
data:
|
46 |
class_path: LitData
|
47 |
+
|
48 |
init_args:
|
49 |
data_path: "../data/"
|
50 |
+
num_workers: 16
|
51 |
+
# num_workers: 3
|
52 |
|
53 |
# Training-related arguments. See ``litgpt.args.TrainArgs`` for details
|
54 |
train:
|
|
|
62 |
global_batch_size: 512
|
63 |
|
64 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
65 |
+
micro_batch_size: 16
|
66 |
+
# micro_batch_size: 14
|
67 |
|
68 |
# Number of iterations with learning rate warmup active (type: int, default: 2000)
|
69 |
lr_warmup_steps: 2000
|
|
|
109 |
|
110 |
# Optimizer-related arguments
|
111 |
optimizer:
|
112 |
+
# class_path: torch.optim.AdamW
|
113 |
# class_path: bitsandbytes.optim.PagedAdamW
|
114 |
+
class_path: bitsandbytes.optim.AdamW8bit
|
115 |
# class_path: bitsandbytes.optim.PagedAdamW8bit
|
116 |
|
117 |
init_args:
|
scripts/requirements-lit.in
CHANGED
@@ -5,6 +5,7 @@ jinja2
|
|
5 |
transformers
|
6 |
bitsandbytes
|
7 |
wandb
|
8 |
-
litgpt[all]
|
|
|
9 |
litdata
|
10 |
grokadamw
|
|
|
5 |
transformers
|
6 |
bitsandbytes
|
7 |
wandb
|
8 |
+
# litgpt[all]
|
9 |
+
litgpt[all] @ git+https://github.com/mtasic85/litgpt.git
|
10 |
litdata
|
11 |
grokadamw
|