ZeroUniqueness commited on
Commit
dab7aaa
Β·
1 Parent(s): c59800f

Training in progress, step 24000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b09fbcc84f52583b6b795b011e1c5f0c43a3db60e66b9f20591fcc7e4f4c5a5c
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e5f593687f6b7f35b8db5be08cd3d3eabc5b69e4a5f70590ddb55cde4f73c64
3
  size 500897101
{checkpoint-20000 β†’ checkpoint-23000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-20000 β†’ checkpoint-23000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-20000 β†’ checkpoint-23000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be1e03ed4e6123418bf0d0eb5aa75d959570f0aeb7b2fc39e2bb25599324a44b
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b09fbcc84f52583b6b795b011e1c5f0c43a3db60e66b9f20591fcc7e4f4c5a5c
3
  size 500897101
{checkpoint-20000/adapter_model β†’ checkpoint-24000}/README.md RENAMED
File without changes
{checkpoint-20000/adapter_model β†’ checkpoint-24000}/adapter_config.json RENAMED
File without changes
{checkpoint-20000/adapter_model β†’ checkpoint-24000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be1e03ed4e6123418bf0d0eb5aa75d959570f0aeb7b2fc39e2bb25599324a44b
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e5f593687f6b7f35b8db5be08cd3d3eabc5b69e4a5f70590ddb55cde4f73c64
3
  size 500897101
{checkpoint-20000 β†’ checkpoint-24000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:672e6e99ca3330c95b0226b42d37365aee4e2e1fd37665dc8a1baea4285d85ce
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c516a1cda2f637391a4cc14345088f2c9231c0a65bc15aa25db213427b391e
3
  size 1001723453
{checkpoint-20000 β†’ checkpoint-24000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9379dd24f4bc7f42c02a2395c82177694b882887f5289666a7fc40b3c707700a
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6a0c20bcdf319a5ffb4a98f7ca699fd1efef2927f5c6f105ca13297f084ba02
3
  size 14575
{checkpoint-20000 β†’ checkpoint-24000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6540ccb807cc963275480e497de74768a480fbf46bd84a7c5eeaa24e5c7b2503
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd7a0be07ab26faaad77ee68b49800320fcb18c774f44c984bf819516ecfa521
3
  size 627
{checkpoint-20000 β†’ checkpoint-24000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.6324757933616638,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-20000",
4
- "epoch": 0.7456843518138772,
5
- "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1366,11 +1366,283 @@
1366
  "eval_samples_per_second": 0.428,
1367
  "eval_steps_per_second": 0.428,
1368
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1369
  }
1370
  ],
1371
  "max_steps": 80463,
1372
  "num_train_epochs": 3,
1373
- "total_flos": 5.606446667012506e+18,
1374
  "trial_name": null,
1375
  "trial_params": null
1376
  }
 
1
  {
2
+ "best_metric": 0.6122664213180542,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-24000",
4
+ "epoch": 0.8948212221766526,
5
+ "global_step": 24000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1366
  "eval_samples_per_second": 0.428,
1367
  "eval_steps_per_second": 0.428,
1368
  "step": 20000
1369
+ },
1370
+ {
1371
+ "epoch": 0.75,
1372
+ "learning_rate": 0.0001707748460945171,
1373
+ "loss": 0.5953,
1374
+ "step": 20100
1375
+ },
1376
+ {
1377
+ "epoch": 0.75,
1378
+ "learning_rate": 0.0001704984413287202,
1379
+ "loss": 0.6329,
1380
+ "step": 20200
1381
+ },
1382
+ {
1383
+ "epoch": 0.76,
1384
+ "learning_rate": 0.00017022096159817493,
1385
+ "loss": 0.6227,
1386
+ "step": 20300
1387
+ },
1388
+ {
1389
+ "epoch": 0.76,
1390
+ "learning_rate": 0.00016994241113391003,
1391
+ "loss": 0.6022,
1392
+ "step": 20400
1393
+ },
1394
+ {
1395
+ "epoch": 0.76,
1396
+ "learning_rate": 0.0001696627941832808,
1397
+ "loss": 0.604,
1398
+ "step": 20500
1399
+ },
1400
+ {
1401
+ "epoch": 0.77,
1402
+ "learning_rate": 0.0001693821150099044,
1403
+ "loss": 0.6101,
1404
+ "step": 20600
1405
+ },
1406
+ {
1407
+ "epoch": 0.77,
1408
+ "learning_rate": 0.00016910037789359485,
1409
+ "loss": 0.6242,
1410
+ "step": 20700
1411
+ },
1412
+ {
1413
+ "epoch": 0.78,
1414
+ "learning_rate": 0.00016881758713029776,
1415
+ "loss": 0.6096,
1416
+ "step": 20800
1417
+ },
1418
+ {
1419
+ "epoch": 0.78,
1420
+ "learning_rate": 0.0001685337470320248,
1421
+ "loss": 0.5948,
1422
+ "step": 20900
1423
+ },
1424
+ {
1425
+ "epoch": 0.78,
1426
+ "learning_rate": 0.0001682488619267879,
1427
+ "loss": 0.5911,
1428
+ "step": 21000
1429
+ },
1430
+ {
1431
+ "epoch": 0.78,
1432
+ "eval_loss": 0.6282580494880676,
1433
+ "eval_runtime": 1313.1215,
1434
+ "eval_samples_per_second": 0.413,
1435
+ "eval_steps_per_second": 0.413,
1436
+ "step": 21000
1437
+ },
1438
+ {
1439
+ "epoch": 0.79,
1440
+ "learning_rate": 0.0001679629361585335,
1441
+ "loss": 0.5716,
1442
+ "step": 21100
1443
+ },
1444
+ {
1445
+ "epoch": 0.79,
1446
+ "learning_rate": 0.00016767597408707594,
1447
+ "loss": 0.5957,
1448
+ "step": 21200
1449
+ },
1450
+ {
1451
+ "epoch": 0.79,
1452
+ "learning_rate": 0.00016738798008803128,
1453
+ "loss": 0.6308,
1454
+ "step": 21300
1455
+ },
1456
+ {
1457
+ "epoch": 0.8,
1458
+ "learning_rate": 0.00016709895855275048,
1459
+ "loss": 0.5891,
1460
+ "step": 21400
1461
+ },
1462
+ {
1463
+ "epoch": 0.8,
1464
+ "learning_rate": 0.00016680891388825243,
1465
+ "loss": 0.6104,
1466
+ "step": 21500
1467
+ },
1468
+ {
1469
+ "epoch": 0.81,
1470
+ "learning_rate": 0.00016651785051715674,
1471
+ "loss": 0.6344,
1472
+ "step": 21600
1473
+ },
1474
+ {
1475
+ "epoch": 0.81,
1476
+ "learning_rate": 0.0001662257728776163,
1477
+ "loss": 0.604,
1478
+ "step": 21700
1479
+ },
1480
+ {
1481
+ "epoch": 0.81,
1482
+ "learning_rate": 0.0001659326854232497,
1483
+ "loss": 0.6066,
1484
+ "step": 21800
1485
+ },
1486
+ {
1487
+ "epoch": 0.82,
1488
+ "learning_rate": 0.0001656385926230732,
1489
+ "loss": 0.6324,
1490
+ "step": 21900
1491
+ },
1492
+ {
1493
+ "epoch": 0.82,
1494
+ "learning_rate": 0.00016534349896143264,
1495
+ "loss": 0.5819,
1496
+ "step": 22000
1497
+ },
1498
+ {
1499
+ "epoch": 0.82,
1500
+ "eval_loss": 0.6218891143798828,
1501
+ "eval_runtime": 1296.6038,
1502
+ "eval_samples_per_second": 0.418,
1503
+ "eval_steps_per_second": 0.418,
1504
+ "step": 22000
1505
+ },
1506
+ {
1507
+ "epoch": 0.82,
1508
+ "learning_rate": 0.00016504740893793512,
1509
+ "loss": 0.6145,
1510
+ "step": 22100
1511
+ },
1512
+ {
1513
+ "epoch": 0.83,
1514
+ "learning_rate": 0.00016475032706738023,
1515
+ "loss": 0.6109,
1516
+ "step": 22200
1517
+ },
1518
+ {
1519
+ "epoch": 0.83,
1520
+ "learning_rate": 0.0001644522578796914,
1521
+ "loss": 0.608,
1522
+ "step": 22300
1523
+ },
1524
+ {
1525
+ "epoch": 0.84,
1526
+ "learning_rate": 0.0001641532059198466,
1527
+ "loss": 0.565,
1528
+ "step": 22400
1529
+ },
1530
+ {
1531
+ "epoch": 0.84,
1532
+ "learning_rate": 0.00016385317574780942,
1533
+ "loss": 0.6139,
1534
+ "step": 22500
1535
+ },
1536
+ {
1537
+ "epoch": 0.84,
1538
+ "learning_rate": 0.000163552171938459,
1539
+ "loss": 0.5888,
1540
+ "step": 22600
1541
+ },
1542
+ {
1543
+ "epoch": 0.85,
1544
+ "learning_rate": 0.00016325019908152078,
1545
+ "loss": 0.6065,
1546
+ "step": 22700
1547
+ },
1548
+ {
1549
+ "epoch": 0.85,
1550
+ "learning_rate": 0.0001629472617814962,
1551
+ "loss": 0.5959,
1552
+ "step": 22800
1553
+ },
1554
+ {
1555
+ "epoch": 0.85,
1556
+ "learning_rate": 0.00016264336465759258,
1557
+ "loss": 0.5918,
1558
+ "step": 22900
1559
+ },
1560
+ {
1561
+ "epoch": 0.86,
1562
+ "learning_rate": 0.0001623385123436528,
1563
+ "loss": 0.6083,
1564
+ "step": 23000
1565
+ },
1566
+ {
1567
+ "epoch": 0.86,
1568
+ "eval_loss": 0.6180054545402527,
1569
+ "eval_runtime": 1278.5639,
1570
+ "eval_samples_per_second": 0.424,
1571
+ "eval_steps_per_second": 0.424,
1572
+ "step": 23000
1573
+ },
1574
+ {
1575
+ "epoch": 0.86,
1576
+ "learning_rate": 0.0001620327094880844,
1577
+ "loss": 0.5795,
1578
+ "step": 23100
1579
+ },
1580
+ {
1581
+ "epoch": 0.86,
1582
+ "learning_rate": 0.00016172596075378893,
1583
+ "loss": 0.6025,
1584
+ "step": 23200
1585
+ },
1586
+ {
1587
+ "epoch": 0.87,
1588
+ "learning_rate": 0.00016141827081809075,
1589
+ "loss": 0.5669,
1590
+ "step": 23300
1591
+ },
1592
+ {
1593
+ "epoch": 0.87,
1594
+ "learning_rate": 0.00016110964437266568,
1595
+ "loss": 0.6172,
1596
+ "step": 23400
1597
+ },
1598
+ {
1599
+ "epoch": 0.88,
1600
+ "learning_rate": 0.00016080008612346955,
1601
+ "loss": 0.5899,
1602
+ "step": 23500
1603
+ },
1604
+ {
1605
+ "epoch": 0.88,
1606
+ "learning_rate": 0.00016048960079066636,
1607
+ "loss": 0.5889,
1608
+ "step": 23600
1609
+ },
1610
+ {
1611
+ "epoch": 0.88,
1612
+ "learning_rate": 0.00016017819310855632,
1613
+ "loss": 0.5893,
1614
+ "step": 23700
1615
+ },
1616
+ {
1617
+ "epoch": 0.89,
1618
+ "learning_rate": 0.00015986586782550376,
1619
+ "loss": 0.6363,
1620
+ "step": 23800
1621
+ },
1622
+ {
1623
+ "epoch": 0.89,
1624
+ "learning_rate": 0.00015955262970386458,
1625
+ "loss": 0.5876,
1626
+ "step": 23900
1627
+ },
1628
+ {
1629
+ "epoch": 0.89,
1630
+ "learning_rate": 0.00015923848351991372,
1631
+ "loss": 0.5964,
1632
+ "step": 24000
1633
+ },
1634
+ {
1635
+ "epoch": 0.89,
1636
+ "eval_loss": 0.6122664213180542,
1637
+ "eval_runtime": 1255.2341,
1638
+ "eval_samples_per_second": 0.432,
1639
+ "eval_steps_per_second": 0.432,
1640
+ "step": 24000
1641
  }
1642
  ],
1643
  "max_steps": 80463,
1644
  "num_train_epochs": 3,
1645
+ "total_flos": 6.727787581059072e+18,
1646
  "trial_name": null,
1647
  "trial_params": null
1648
  }
{checkpoint-20000 β†’ checkpoint-24000}/training_args.bin RENAMED
File without changes