MicroPanda123
commited on
Commit
·
a40e958
1
Parent(s):
a8b5b46
Newer model
Browse files
ckpt.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2300f0f0262b7a6f0cda7161cfe336de442b23c248e0d4c8f6fb934d6156d907
|
3 |
+
size 1492571162
|
info.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
iter_num: 9760
|
2 |
+
train_loss: 0.7207
|
3 |
+
val_loss: 1.2744
|
4 |
+
config: {'out_dir': 'out-python', 'eval_interval': 20, 'log_interval': 1, 'eval_iters': 40, 'eval_only': False, 'always_save_checkpoint': True, 'init_from': 'resume', 'model_interval': 200, 'wandb_log': False, 'wandb_project': 'shakespeare', 'wandb_run_name': 'ft-1689354706.0141432', 'dataset': 'python', 'gradient_accumulation_steps': 64, 'batch_size': 2, 'block_size': 1024, 'n_layer': 12, 'n_head': 12, 'n_embd': 768, 'dropout': 0.0, 'bias': False, 'learning_rate': 0.0006, 'max_iters': 600000, 'weight_decay': 0.1, 'beta1': 0.9, 'beta2': 0.95, 'grad_clip': 1.0, 'decay_lr': True, 'warmup_iters': 2000, 'lr_decay_iters': 600000, 'min_lr': 6e-05, 'backend': 'nccl', 'device': 'cuda', 'dtype': 'float16', 'compile': True}
|