TinyDNABERT
/
Finetuned Models
/finetuning_outputs
/tfbs
/TinyDNABERT_base_model
/checkpoint-2000
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 8.0, | |
"eval_steps": 500, | |
"global_step": 2000, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.1, | |
"grad_norm": 0.6611918807029724, | |
"learning_rate": 5.319148936170213e-05, | |
"loss": 0.6932, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.2, | |
"grad_norm": 0.9467485547065735, | |
"learning_rate": 0.00010638297872340425, | |
"loss": 0.6912, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.3, | |
"grad_norm": 0.9465051889419556, | |
"learning_rate": 0.00015957446808510637, | |
"loss": 0.6893, | |
"step": 75 | |
}, | |
{ | |
"epoch": 0.4, | |
"grad_norm": 1.5430934429168701, | |
"learning_rate": 0.0002127659574468085, | |
"loss": 0.6804, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.5, | |
"grad_norm": 1.6103107929229736, | |
"learning_rate": 0.00026595744680851064, | |
"loss": 0.6806, | |
"step": 125 | |
}, | |
{ | |
"epoch": 0.6, | |
"grad_norm": 2.3773200511932373, | |
"learning_rate": 0.00031914893617021275, | |
"loss": 0.6801, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.7, | |
"grad_norm": 1.8832203149795532, | |
"learning_rate": 0.0003723404255319149, | |
"loss": 0.6791, | |
"step": 175 | |
}, | |
{ | |
"epoch": 0.8, | |
"grad_norm": 1.3350876569747925, | |
"learning_rate": 0.0003992081821181128, | |
"loss": 0.6746, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.9, | |
"grad_norm": 1.5796219110488892, | |
"learning_rate": 0.0003975585615308479, | |
"loss": 0.6771, | |
"step": 225 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 1.884006381034851, | |
"learning_rate": 0.00039590894094358297, | |
"loss": 0.6649, | |
"step": 250 | |
}, | |
{ | |
"epoch": 1.1, | |
"grad_norm": 3.286440372467041, | |
"learning_rate": 0.0003942593203563181, | |
"loss": 0.6388, | |
"step": 275 | |
}, | |
{ | |
"epoch": 1.2, | |
"grad_norm": 8.244946479797363, | |
"learning_rate": 0.0003926096997690532, | |
"loss": 0.6238, | |
"step": 300 | |
}, | |
{ | |
"epoch": 1.3, | |
"grad_norm": 4.265683650970459, | |
"learning_rate": 0.00039096007918178817, | |
"loss": 0.6383, | |
"step": 325 | |
}, | |
{ | |
"epoch": 1.4, | |
"grad_norm": 4.115826606750488, | |
"learning_rate": 0.0003893104585945233, | |
"loss": 0.6194, | |
"step": 350 | |
}, | |
{ | |
"epoch": 1.5, | |
"grad_norm": 5.694250583648682, | |
"learning_rate": 0.0003876608380072583, | |
"loss": 0.6324, | |
"step": 375 | |
}, | |
{ | |
"epoch": 1.6, | |
"grad_norm": 3.463121175765991, | |
"learning_rate": 0.00038601121741999343, | |
"loss": 0.621, | |
"step": 400 | |
}, | |
{ | |
"epoch": 1.7, | |
"grad_norm": 4.582865238189697, | |
"learning_rate": 0.0003843615968327285, | |
"loss": 0.6116, | |
"step": 425 | |
}, | |
{ | |
"epoch": 1.8, | |
"grad_norm": 11.996281623840332, | |
"learning_rate": 0.0003827119762454636, | |
"loss": 0.6393, | |
"step": 450 | |
}, | |
{ | |
"epoch": 1.9, | |
"grad_norm": 3.0407373905181885, | |
"learning_rate": 0.00038106235565819863, | |
"loss": 0.628, | |
"step": 475 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 2.917588233947754, | |
"learning_rate": 0.0003794127350709337, | |
"loss": 0.6078, | |
"step": 500 | |
}, | |
{ | |
"epoch": 2.1, | |
"grad_norm": 4.748379707336426, | |
"learning_rate": 0.0003777631144836688, | |
"loss": 0.4899, | |
"step": 525 | |
}, | |
{ | |
"epoch": 2.2, | |
"grad_norm": 3.8076977729797363, | |
"learning_rate": 0.00037611349389640383, | |
"loss": 0.5086, | |
"step": 550 | |
}, | |
{ | |
"epoch": 2.3, | |
"grad_norm": 5.2440714836120605, | |
"learning_rate": 0.00037446387330913894, | |
"loss": 0.5327, | |
"step": 575 | |
}, | |
{ | |
"epoch": 2.4, | |
"grad_norm": 7.110438346862793, | |
"learning_rate": 0.000372814252721874, | |
"loss": 0.5436, | |
"step": 600 | |
}, | |
{ | |
"epoch": 2.5, | |
"grad_norm": 5.46150541305542, | |
"learning_rate": 0.00037116463213460903, | |
"loss": 0.5294, | |
"step": 625 | |
}, | |
{ | |
"epoch": 2.6, | |
"grad_norm": 5.136163234710693, | |
"learning_rate": 0.00036951501154734414, | |
"loss": 0.5245, | |
"step": 650 | |
}, | |
{ | |
"epoch": 2.7, | |
"grad_norm": 8.735346794128418, | |
"learning_rate": 0.0003678653909600792, | |
"loss": 0.5449, | |
"step": 675 | |
}, | |
{ | |
"epoch": 2.8, | |
"grad_norm": 2.922825574874878, | |
"learning_rate": 0.0003662157703728143, | |
"loss": 0.5406, | |
"step": 700 | |
}, | |
{ | |
"epoch": 2.9, | |
"grad_norm": 7.744819641113281, | |
"learning_rate": 0.00036456614978554934, | |
"loss": 0.5447, | |
"step": 725 | |
}, | |
{ | |
"epoch": 3.0, | |
"grad_norm": 11.6185884475708, | |
"learning_rate": 0.00036291652919828444, | |
"loss": 0.5195, | |
"step": 750 | |
}, | |
{ | |
"epoch": 3.1, | |
"grad_norm": 3.73836088180542, | |
"learning_rate": 0.00036126690861101944, | |
"loss": 0.3824, | |
"step": 775 | |
}, | |
{ | |
"epoch": 3.2, | |
"grad_norm": 14.850343704223633, | |
"learning_rate": 0.00035961728802375454, | |
"loss": 0.4071, | |
"step": 800 | |
}, | |
{ | |
"epoch": 3.3, | |
"grad_norm": 5.7157440185546875, | |
"learning_rate": 0.0003579676674364896, | |
"loss": 0.3986, | |
"step": 825 | |
}, | |
{ | |
"epoch": 3.4, | |
"grad_norm": 12.418399810791016, | |
"learning_rate": 0.0003563180468492247, | |
"loss": 0.4282, | |
"step": 850 | |
}, | |
{ | |
"epoch": 3.5, | |
"grad_norm": 12.793001174926758, | |
"learning_rate": 0.0003546684262619598, | |
"loss": 0.4822, | |
"step": 875 | |
}, | |
{ | |
"epoch": 3.6, | |
"grad_norm": 6.489450931549072, | |
"learning_rate": 0.00035301880567469485, | |
"loss": 0.4239, | |
"step": 900 | |
}, | |
{ | |
"epoch": 3.7, | |
"grad_norm": 5.365822792053223, | |
"learning_rate": 0.0003513691850874299, | |
"loss": 0.421, | |
"step": 925 | |
}, | |
{ | |
"epoch": 3.8, | |
"grad_norm": 12.643745422363281, | |
"learning_rate": 0.00034971956450016495, | |
"loss": 0.3964, | |
"step": 950 | |
}, | |
{ | |
"epoch": 3.9, | |
"grad_norm": 14.334024429321289, | |
"learning_rate": 0.00034806994391290005, | |
"loss": 0.4634, | |
"step": 975 | |
}, | |
{ | |
"epoch": 4.0, | |
"grad_norm": 6.819091320037842, | |
"learning_rate": 0.0003464203233256351, | |
"loss": 0.4139, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 4.1, | |
"grad_norm": 8.29238224029541, | |
"learning_rate": 0.0003447707027383702, | |
"loss": 0.2695, | |
"step": 1025 | |
}, | |
{ | |
"epoch": 4.2, | |
"grad_norm": 5.984206676483154, | |
"learning_rate": 0.00034312108215110525, | |
"loss": 0.2653, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 4.3, | |
"grad_norm": 5.9425435066223145, | |
"learning_rate": 0.0003414714615638403, | |
"loss": 0.2982, | |
"step": 1075 | |
}, | |
{ | |
"epoch": 4.4, | |
"grad_norm": 7.1877593994140625, | |
"learning_rate": 0.0003398218409765754, | |
"loss": 0.3309, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 4.5, | |
"grad_norm": 17.83046531677246, | |
"learning_rate": 0.00033817222038931045, | |
"loss": 0.3467, | |
"step": 1125 | |
}, | |
{ | |
"epoch": 4.6, | |
"grad_norm": 4.865128517150879, | |
"learning_rate": 0.00033652259980204556, | |
"loss": 0.3117, | |
"step": 1150 | |
}, | |
{ | |
"epoch": 4.7, | |
"grad_norm": 31.427154541015625, | |
"learning_rate": 0.0003348729792147806, | |
"loss": 0.3273, | |
"step": 1175 | |
}, | |
{ | |
"epoch": 4.8, | |
"grad_norm": 26.77984619140625, | |
"learning_rate": 0.0003332233586275157, | |
"loss": 0.3504, | |
"step": 1200 | |
}, | |
{ | |
"epoch": 4.9, | |
"grad_norm": 9.389993667602539, | |
"learning_rate": 0.00033157373804025076, | |
"loss": 0.3339, | |
"step": 1225 | |
}, | |
{ | |
"epoch": 5.0, | |
"grad_norm": 25.70441246032715, | |
"learning_rate": 0.0003299241174529858, | |
"loss": 0.3336, | |
"step": 1250 | |
}, | |
{ | |
"epoch": 5.1, | |
"grad_norm": 13.482085227966309, | |
"learning_rate": 0.0003282744968657209, | |
"loss": 0.2156, | |
"step": 1275 | |
}, | |
{ | |
"epoch": 5.2, | |
"grad_norm": 8.725810050964355, | |
"learning_rate": 0.00032662487627845596, | |
"loss": 0.1811, | |
"step": 1300 | |
}, | |
{ | |
"epoch": 5.3, | |
"grad_norm": 9.313215255737305, | |
"learning_rate": 0.00032497525569119106, | |
"loss": 0.2807, | |
"step": 1325 | |
}, | |
{ | |
"epoch": 5.4, | |
"grad_norm": 11.026411056518555, | |
"learning_rate": 0.0003233256351039261, | |
"loss": 0.2757, | |
"step": 1350 | |
}, | |
{ | |
"epoch": 5.5, | |
"grad_norm": 11.038985252380371, | |
"learning_rate": 0.00032167601451666116, | |
"loss": 0.2177, | |
"step": 1375 | |
}, | |
{ | |
"epoch": 5.6, | |
"grad_norm": 4.008651256561279, | |
"learning_rate": 0.00032002639392939627, | |
"loss": 0.2163, | |
"step": 1400 | |
}, | |
{ | |
"epoch": 5.7, | |
"grad_norm": 12.480770111083984, | |
"learning_rate": 0.0003183767733421313, | |
"loss": 0.2173, | |
"step": 1425 | |
}, | |
{ | |
"epoch": 5.8, | |
"grad_norm": 8.751969337463379, | |
"learning_rate": 0.0003167271527548664, | |
"loss": 0.2299, | |
"step": 1450 | |
}, | |
{ | |
"epoch": 5.9, | |
"grad_norm": 7.701971530914307, | |
"learning_rate": 0.00031507753216760147, | |
"loss": 0.1949, | |
"step": 1475 | |
}, | |
{ | |
"epoch": 6.0, | |
"grad_norm": 8.48027515411377, | |
"learning_rate": 0.00031342791158033657, | |
"loss": 0.2599, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 6.1, | |
"grad_norm": 9.29404067993164, | |
"learning_rate": 0.00031177829099307157, | |
"loss": 0.1376, | |
"step": 1525 | |
}, | |
{ | |
"epoch": 6.2, | |
"grad_norm": 20.137714385986328, | |
"learning_rate": 0.00031012867040580667, | |
"loss": 0.1647, | |
"step": 1550 | |
}, | |
{ | |
"epoch": 6.3, | |
"grad_norm": 11.394575119018555, | |
"learning_rate": 0.0003084790498185417, | |
"loss": 0.1565, | |
"step": 1575 | |
}, | |
{ | |
"epoch": 6.4, | |
"grad_norm": 8.214287757873535, | |
"learning_rate": 0.0003068294292312768, | |
"loss": 0.1739, | |
"step": 1600 | |
}, | |
{ | |
"epoch": 6.5, | |
"grad_norm": 7.779988765716553, | |
"learning_rate": 0.0003051798086440119, | |
"loss": 0.1403, | |
"step": 1625 | |
}, | |
{ | |
"epoch": 6.6, | |
"grad_norm": 9.421648025512695, | |
"learning_rate": 0.000303530188056747, | |
"loss": 0.1545, | |
"step": 1650 | |
}, | |
{ | |
"epoch": 6.7, | |
"grad_norm": 5.751734256744385, | |
"learning_rate": 0.000301880567469482, | |
"loss": 0.1971, | |
"step": 1675 | |
}, | |
{ | |
"epoch": 6.8, | |
"grad_norm": 23.861705780029297, | |
"learning_rate": 0.0003002309468822171, | |
"loss": 0.1681, | |
"step": 1700 | |
}, | |
{ | |
"epoch": 6.9, | |
"grad_norm": 18.944721221923828, | |
"learning_rate": 0.0002985813262949522, | |
"loss": 0.1703, | |
"step": 1725 | |
}, | |
{ | |
"epoch": 7.0, | |
"grad_norm": 14.045795440673828, | |
"learning_rate": 0.00029693170570768723, | |
"loss": 0.1801, | |
"step": 1750 | |
}, | |
{ | |
"epoch": 7.1, | |
"grad_norm": 6.4620137214660645, | |
"learning_rate": 0.00029528208512042233, | |
"loss": 0.1253, | |
"step": 1775 | |
}, | |
{ | |
"epoch": 7.2, | |
"grad_norm": 4.318169593811035, | |
"learning_rate": 0.0002936324645331574, | |
"loss": 0.1397, | |
"step": 1800 | |
}, | |
{ | |
"epoch": 7.3, | |
"grad_norm": 24.91462516784668, | |
"learning_rate": 0.00029198284394589243, | |
"loss": 0.1259, | |
"step": 1825 | |
}, | |
{ | |
"epoch": 7.4, | |
"grad_norm": 23.614572525024414, | |
"learning_rate": 0.00029033322335862753, | |
"loss": 0.1293, | |
"step": 1850 | |
}, | |
{ | |
"epoch": 7.5, | |
"grad_norm": 3.60048508644104, | |
"learning_rate": 0.0002886836027713626, | |
"loss": 0.138, | |
"step": 1875 | |
}, | |
{ | |
"epoch": 7.6, | |
"grad_norm": 16.62705421447754, | |
"learning_rate": 0.0002870339821840977, | |
"loss": 0.1733, | |
"step": 1900 | |
}, | |
{ | |
"epoch": 7.7, | |
"grad_norm": 33.79671859741211, | |
"learning_rate": 0.00028538436159683273, | |
"loss": 0.217, | |
"step": 1925 | |
}, | |
{ | |
"epoch": 7.8, | |
"grad_norm": 9.69206428527832, | |
"learning_rate": 0.00028373474100956784, | |
"loss": 0.1289, | |
"step": 1950 | |
}, | |
{ | |
"epoch": 7.9, | |
"grad_norm": 18.655046463012695, | |
"learning_rate": 0.0002820851204223029, | |
"loss": 0.1168, | |
"step": 1975 | |
}, | |
{ | |
"epoch": 8.0, | |
"grad_norm": 8.4110746383667, | |
"learning_rate": 0.00028043549983503794, | |
"loss": 0.1471, | |
"step": 2000 | |
} | |
], | |
"logging_steps": 25, | |
"max_steps": 6250, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 25, | |
"save_steps": 1000, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 101638963200000.0, | |
"train_batch_size": 20, | |
"trial_name": null, | |
"trial_params": null | |
} | |