akhooli/nli-500k-triplets-MB
Browse files
README.md
CHANGED
@@ -281,7 +281,6 @@ You can finetune this model on your own dataset.
|
|
281 |
- `per_device_eval_batch_size`: 12
|
282 |
- `learning_rate`: 2e-05
|
283 |
- `num_train_epochs`: 1
|
284 |
-
- `max_steps`: 500
|
285 |
- `warmup_ratio`: 0.1
|
286 |
- `fp16`: True
|
287 |
- `batch_sampler`: no_duplicates
|
@@ -307,7 +306,7 @@ You can finetune this model on your own dataset.
|
|
307 |
- `adam_epsilon`: 1e-08
|
308 |
- `max_grad_norm`: 1.0
|
309 |
- `num_train_epochs`: 1
|
310 |
-
- `max_steps`:
|
311 |
- `lr_scheduler_type`: linear
|
312 |
- `lr_scheduler_kwargs`: {}
|
313 |
- `warmup_ratio`: 0.1
|
@@ -408,10 +407,96 @@ You can finetune this model on your own dataset.
|
|
408 |
</details>
|
409 |
|
410 |
### Training Logs
|
411 |
-
| Epoch | Step
|
412 |
-
|
413 |
-
| 0.0113 | 250
|
414 |
-
| 0.0226 | 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
|
416 |
|
417 |
### Framework Versions
|
@@ -420,7 +505,7 @@ You can finetune this model on your own dataset.
|
|
420 |
- Transformers: 4.48.0
|
421 |
- PyTorch: 2.5.1+cu121
|
422 |
- Accelerate: 0.34.2
|
423 |
-
- Datasets: 3.0.
|
424 |
- Tokenizers: 0.21.0
|
425 |
|
426 |
## Citation
|
|
|
281 |
- `per_device_eval_batch_size`: 12
|
282 |
- `learning_rate`: 2e-05
|
283 |
- `num_train_epochs`: 1
|
|
|
284 |
- `warmup_ratio`: 0.1
|
285 |
- `fp16`: True
|
286 |
- `batch_sampler`: no_duplicates
|
|
|
306 |
- `adam_epsilon`: 1e-08
|
307 |
- `max_grad_norm`: 1.0
|
308 |
- `num_train_epochs`: 1
|
309 |
+
- `max_steps`: -1
|
310 |
- `lr_scheduler_type`: linear
|
311 |
- `lr_scheduler_kwargs`: {}
|
312 |
- `warmup_ratio`: 0.1
|
|
|
407 |
</details>
|
408 |
|
409 |
### Training Logs
|
410 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
411 |
+
|:------:|:-----:|:-------------:|:---------------:|
|
412 |
+
| 0.0113 | 250 | 6.9693 | - |
|
413 |
+
| 0.0226 | 500 | 4.6555 | - |
|
414 |
+
| 0.0340 | 750 | 3.6612 | - |
|
415 |
+
| 0.0453 | 1000 | 3.3137 | - |
|
416 |
+
| 0.0566 | 1250 | 3.0655 | - |
|
417 |
+
| 0.0679 | 1500 | 2.9465 | - |
|
418 |
+
| 0.0792 | 1750 | 2.9436 | - |
|
419 |
+
| 0.0906 | 2000 | 2.7902 | - |
|
420 |
+
| 0.1019 | 2250 | 2.7131 | - |
|
421 |
+
| 0.1132 | 2500 | 2.7388 | - |
|
422 |
+
| 0.1245 | 2750 | 2.7474 | - |
|
423 |
+
| 0.1359 | 3000 | 2.5196 | - |
|
424 |
+
| 0.1472 | 3250 | 2.4522 | - |
|
425 |
+
| 0.1585 | 3500 | 2.4588 | - |
|
426 |
+
| 0.1698 | 3750 | 2.458 | - |
|
427 |
+
| 0.1811 | 4000 | 2.3643 | - |
|
428 |
+
| 0.1925 | 4250 | 2.2747 | - |
|
429 |
+
| 0.2038 | 4500 | 2.1526 | - |
|
430 |
+
| 0.2151 | 4750 | 2.0504 | - |
|
431 |
+
| 0.2264 | 5000 | 2.1278 | 2.1682 |
|
432 |
+
| 0.2377 | 5250 | 2.0536 | - |
|
433 |
+
| 0.2491 | 5500 | 2.0332 | - |
|
434 |
+
| 0.2604 | 5750 | 1.9816 | - |
|
435 |
+
| 0.2717 | 6000 | 1.8878 | - |
|
436 |
+
| 0.2830 | 6250 | 1.8733 | - |
|
437 |
+
| 0.2943 | 6500 | 1.8573 | - |
|
438 |
+
| 0.3057 | 6750 | 1.9132 | - |
|
439 |
+
| 0.3170 | 7000 | 1.7868 | - |
|
440 |
+
| 0.3283 | 7250 | 1.7047 | - |
|
441 |
+
| 0.3396 | 7500 | 1.836 | - |
|
442 |
+
| 0.3509 | 7750 | 1.7552 | - |
|
443 |
+
| 0.3623 | 8000 | 1.6976 | - |
|
444 |
+
| 0.3736 | 8250 | 1.7005 | - |
|
445 |
+
| 0.3849 | 8500 | 1.7418 | - |
|
446 |
+
| 0.3962 | 8750 | 1.6407 | - |
|
447 |
+
| 0.4076 | 9000 | 1.6039 | - |
|
448 |
+
| 0.4189 | 9250 | 1.6287 | - |
|
449 |
+
| 0.4302 | 9500 | 1.5528 | - |
|
450 |
+
| 0.4415 | 9750 | 1.5981 | - |
|
451 |
+
| 0.4528 | 10000 | 1.5705 | 1.6057 |
|
452 |
+
| 0.4642 | 10250 | 1.5245 | - |
|
453 |
+
| 0.4755 | 10500 | 1.444 | - |
|
454 |
+
| 0.4868 | 10750 | 1.4127 | - |
|
455 |
+
| 0.4981 | 11000 | 1.5006 | - |
|
456 |
+
| 0.5094 | 11250 | 1.3875 | - |
|
457 |
+
| 0.5208 | 11500 | 1.3678 | - |
|
458 |
+
| 0.5321 | 11750 | 1.4748 | - |
|
459 |
+
| 0.5434 | 12000 | 1.4333 | - |
|
460 |
+
| 0.5547 | 12250 | 1.4464 | - |
|
461 |
+
| 0.5660 | 12500 | 1.3053 | - |
|
462 |
+
| 0.5774 | 12750 | 1.3522 | - |
|
463 |
+
| 0.5887 | 13000 | 1.2708 | - |
|
464 |
+
| 0.6000 | 13250 | 1.3063 | - |
|
465 |
+
| 0.6113 | 13500 | 1.329 | - |
|
466 |
+
| 0.6227 | 13750 | 1.2761 | - |
|
467 |
+
| 0.6340 | 14000 | 1.303 | - |
|
468 |
+
| 0.6453 | 14250 | 1.3323 | - |
|
469 |
+
| 0.6566 | 14500 | 1.2228 | - |
|
470 |
+
| 0.6679 | 14750 | 1.2488 | - |
|
471 |
+
| 0.6793 | 15000 | 1.2635 | 1.2501 |
|
472 |
+
| 0.6906 | 15250 | 1.2333 | - |
|
473 |
+
| 0.7019 | 15500 | 1.203 | - |
|
474 |
+
| 0.7132 | 15750 | 1.1859 | - |
|
475 |
+
| 0.7245 | 16000 | 1.166 | - |
|
476 |
+
| 0.7359 | 16250 | 1.1935 | - |
|
477 |
+
| 0.7472 | 16500 | 1.1466 | - |
|
478 |
+
| 0.7585 | 16750 | 1.2175 | - |
|
479 |
+
| 0.7698 | 17000 | 1.1073 | - |
|
480 |
+
| 0.7811 | 17250 | 1.1363 | - |
|
481 |
+
| 0.7925 | 17500 | 1.2153 | - |
|
482 |
+
| 0.8038 | 17750 | 1.1302 | - |
|
483 |
+
| 0.8151 | 18000 | 1.0426 | - |
|
484 |
+
| 0.8264 | 18250 | 1.0576 | - |
|
485 |
+
| 0.8377 | 18500 | 1.0944 | - |
|
486 |
+
| 0.8491 | 18750 | 1.0158 | - |
|
487 |
+
| 0.8604 | 19000 | 1.1024 | - |
|
488 |
+
| 0.8717 | 19250 | 1.0598 | - |
|
489 |
+
| 0.8830 | 19500 | 1.0636 | - |
|
490 |
+
| 0.8944 | 19750 | 1.0052 | - |
|
491 |
+
| 0.9057 | 20000 | 1.0509 | 1.0778 |
|
492 |
+
| 0.9170 | 20250 | 1.0107 | - |
|
493 |
+
| 0.9283 | 20500 | 0.9524 | - |
|
494 |
+
| 0.9396 | 20750 | 1.0021 | - |
|
495 |
+
| 0.9510 | 21000 | 1.0262 | - |
|
496 |
+
| 0.9623 | 21250 | 0.9597 | - |
|
497 |
+
| 0.9736 | 21500 | 0.9471 | - |
|
498 |
+
| 0.9849 | 21750 | 1.0467 | - |
|
499 |
+
| 0.9962 | 22000 | 1.051 | - |
|
500 |
|
501 |
|
502 |
### Framework Versions
|
|
|
505 |
- Transformers: 4.48.0
|
506 |
- PyTorch: 2.5.1+cu121
|
507 |
- Accelerate: 0.34.2
|
508 |
+
- Datasets: 3.0.1
|
509 |
- Tokenizers: 0.21.0
|
510 |
|
511 |
## Citation
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 596070136
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f92d69b7df664f82568c3d7637f84d55d7070729d683e18d8b3b8f9e498156e
|
3 |
size 596070136
|
runs/Jan15_08-03-45_21ac67580ab7/events.out.tfevents.1736928228.21ac67580ab7.99.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:260c123fa491a2820a1ff1d5a1266da89326ff120c3bda2836fe50ead3f1e400
|
3 |
+
size 24545
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5688
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10148b8160d80da588a514decb1807d3d9168d3a4a81000638093b3acb0983a6
|
3 |
size 5688
|