ivanovsdesign commited on 17 days ago

Commit

fdcd368

•

1 Parent(s): 0be7ef3

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

README.md +36 -0
checkpoint-7302/config.json +49 -0
checkpoint-7302/model.safetensors +3 -0
checkpoint-7302/optimizer.pt +3 -0
checkpoint-7302/rng_state_0.pth +3 -0
checkpoint-7302/rng_state_1.pth +3 -0
checkpoint-7302/scheduler.pt +3 -0
checkpoint-7302/trainer_state.json +2140 -0
checkpoint-7302/training_args.bin +3 -0
config.json +49 -0
model.safetensors +3 -0
runs/Nov09_23-13-04_6604152ce143/events.out.tfevents.1731193985.6604152ce143.674.0 +2 -2
runs/Nov09_23-13-04_6604152ce143/events.out.tfevents.1731197419.6604152ce143.674.1 +3 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +64 -0
training_args.bin +3 -0
training_params.json +30 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+---
+tags:
+- autotrain
+- text-classification
+base_model: ivanovsdesign/huawei-data-classification
+widget:
+- text: "I love AutoTrain"
+---
+# Model Trained Using AutoTrain
+- Problem type: Text Classification
+## Validation Metrics
+loss: 0.9126833081245422
+f1_macro: 0.47986378330102575
+f1_micro: 0.6351587383129559
+f1_weighted: 0.6366218608153963
+precision_macro: 0.4869491643480056
+precision_micro: 0.6351587383129559
+precision_weighted: 0.6494956994259575
+recall_macro: 0.4963958102076925
+recall_micro: 0.6351587383129559
+recall_weighted: 0.6351587383129559
+accuracy: 0.6351587383129559

checkpoint-7302/config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "_name_or_path": "ivanovsdesign/huawei-data-classification",
+  "_num_labels": 5,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "1",
+    "1": "2",
+    "2": "3",
+    "3": "4",
+    "4": "5"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "1": 0,
+    "2": 1,
+    "3": 2,
+    "4": 3,
+    "5": 4
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 119547
+}

checkpoint-7302/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1482e954dc7bbef048c49c06bdc65118b9138ec4ca7e91f7933627bf7f2df339
+size 711452684

checkpoint-7302/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d34860e072b3e89ae53915f271347e78ded67fb3cfb4a99a3e2e844495bcacb1
+size 1423026426

checkpoint-7302/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0854bb32a2482ca3dbd6c9360ad92f55abbc1c6a7252c1425c386b9e1c0ba633
+size 14512

checkpoint-7302/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c8952e15270b4d4ee66d9d14430aadfa68e894b1fcb2fb7df94f6fc7998bf10
+size 14512

checkpoint-7302/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f73a56f8280d093a2719832534cf685e9b73a8df36f27a7e25f92295dcdeb2da
+size 1064

checkpoint-7302/trainer_state.json ADDED Viewed

	@@ -0,0 +1,2140 @@

+{
+  "best_metric": 0.8904216289520264,
+  "best_model_checkpoint": "rubert-base-ru-huawei-sentiment-fine-up/checkpoint-7302",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 7302,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.010271158586688579,
+      "grad_norm": 5.9403300285339355,
+      "learning_rate": 5.705157462345961e-07,
+      "loss": 0.7105,
+      "step": 25
+    },
+    {
+      "epoch": 0.020542317173377157,
+      "grad_norm": 10.47209644317627,
+      "learning_rate": 1.1410314924691921e-06,
+      "loss": 0.77,
+      "step": 50
+    },
+    {
+      "epoch": 0.030813475760065736,
+      "grad_norm": 29.545536041259766,
+      "learning_rate": 1.7115472387037881e-06,
+      "loss": 0.6779,
+      "step": 75
+    },
+    {
+      "epoch": 0.041084634346754315,
+      "grad_norm": 8.517972946166992,
+      "learning_rate": 2.2820629849383843e-06,
+      "loss": 0.6978,
+      "step": 100
+    },
+    {
+      "epoch": 0.05135579293344289,
+      "grad_norm": 5.000863075256348,
+      "learning_rate": 2.8525787311729803e-06,
+      "loss": 0.7072,
+      "step": 125
+    },
+    {
+      "epoch": 0.06162695152013147,
+      "grad_norm": 6.92432165145874,
+      "learning_rate": 3.4230944774075762e-06,
+      "loss": 0.7056,
+      "step": 150
+    },
+    {
+      "epoch": 0.07189811010682005,
+      "grad_norm": 15.27511978149414,
+      "learning_rate": 3.993610223642173e-06,
+      "loss": 0.7201,
+      "step": 175
+    },
+    {
+      "epoch": 0.08216926869350863,
+      "grad_norm": 18.27556037902832,
+      "learning_rate": 4.564125969876769e-06,
+      "loss": 0.6529,
+      "step": 200
+    },
+    {
+      "epoch": 0.0924404272801972,
+      "grad_norm": 6.184650897979736,
+      "learning_rate": 5.111821086261981e-06,
+      "loss": 0.6994,
+      "step": 225
+    },
+    {
+      "epoch": 0.10271158586688578,
+      "grad_norm": 5.983186721801758,
+      "learning_rate": 5.682336832496577e-06,
+      "loss": 0.674,
+      "step": 250
+    },
+    {
+      "epoch": 0.11298274445357437,
+      "grad_norm": 10.52604866027832,
+      "learning_rate": 6.252852578731174e-06,
+      "loss": 0.6855,
+      "step": 275
+    },
+    {
+      "epoch": 0.12325390304026294,
+      "grad_norm": 7.054647922515869,
+      "learning_rate": 6.823368324965769e-06,
+      "loss": 0.6427,
+      "step": 300
+    },
+    {
+      "epoch": 0.13352506162695152,
+      "grad_norm": 10.486727714538574,
+      "learning_rate": 7.393884071200366e-06,
+      "loss": 0.7255,
+      "step": 325
+    },
+    {
+      "epoch": 0.1437962202136401,
+      "grad_norm": 4.579376697540283,
+      "learning_rate": 7.964399817434962e-06,
+      "loss": 0.5189,
+      "step": 350
+    },
+    {
+      "epoch": 0.15406737880032867,
+      "grad_norm": 5.1492414474487305,
+      "learning_rate": 8.534915563669557e-06,
+      "loss": 0.6923,
+      "step": 375
+    },
+    {
+      "epoch": 0.16433853738701726,
+      "grad_norm": 10.567380905151367,
+      "learning_rate": 9.105431309904154e-06,
+      "loss": 0.8093,
+      "step": 400
+    },
+    {
+      "epoch": 0.17460969597370585,
+      "grad_norm": 4.735847473144531,
+      "learning_rate": 9.67594705613875e-06,
+      "loss": 0.5677,
+      "step": 425
+    },
+    {
+      "epoch": 0.1848808545603944,
+      "grad_norm": 6.456740856170654,
+      "learning_rate": 1.0246462802373347e-05,
+      "loss": 0.6611,
+      "step": 450
+    },
+    {
+      "epoch": 0.195152013147083,
+      "grad_norm": 11.575152397155762,
+      "learning_rate": 1.0816978548607942e-05,
+      "loss": 0.7,
+      "step": 475
+    },
+    {
+      "epoch": 0.20542317173377156,
+      "grad_norm": 11.752349853515625,
+      "learning_rate": 1.1387494294842537e-05,
+      "loss": 0.6638,
+      "step": 500
+    },
+    {
+      "epoch": 0.21569433032046015,
+      "grad_norm": 21.79088020324707,
+      "learning_rate": 1.1958010041077134e-05,
+      "loss": 0.596,
+      "step": 525
+    },
+    {
+      "epoch": 0.22596548890714874,
+      "grad_norm": 6.109403610229492,
+      "learning_rate": 1.2528525787311731e-05,
+      "loss": 0.6791,
+      "step": 550
+    },
+    {
+      "epoch": 0.2362366474938373,
+      "grad_norm": 15.077224731445312,
+      "learning_rate": 1.3099041533546328e-05,
+      "loss": 0.7015,
+      "step": 575
+    },
+    {
+      "epoch": 0.2465078060805259,
+      "grad_norm": 9.261869430541992,
+      "learning_rate": 1.3669557279780923e-05,
+      "loss": 0.698,
+      "step": 600
+    },
+    {
+      "epoch": 0.25677896466721445,
+      "grad_norm": 7.057260036468506,
+      "learning_rate": 1.4240073026015518e-05,
+      "loss": 0.6274,
+      "step": 625
+    },
+    {
+      "epoch": 0.26705012325390304,
+      "grad_norm": 7.404267311096191,
+      "learning_rate": 1.4810588772250115e-05,
+      "loss": 0.6668,
+      "step": 650
+    },
+    {
+      "epoch": 0.2773212818405916,
+      "grad_norm": 9.93870735168457,
+      "learning_rate": 1.538110451848471e-05,
+      "loss": 0.7213,
+      "step": 675
+    },
+    {
+      "epoch": 0.2875924404272802,
+      "grad_norm": 7.443170547485352,
+      "learning_rate": 1.5951620264719307e-05,
+      "loss": 0.6831,
+      "step": 700
+    },
+    {
+      "epoch": 0.29786359901396875,
+      "grad_norm": 9.246779441833496,
+      "learning_rate": 1.6522136010953902e-05,
+      "loss": 0.7139,
+      "step": 725
+    },
+    {
+      "epoch": 0.30813475760065734,
+      "grad_norm": 6.488850116729736,
+      "learning_rate": 1.70926517571885e-05,
+      "loss": 0.6312,
+      "step": 750
+    },
+    {
+      "epoch": 0.3184059161873459,
+      "grad_norm": 6.905552387237549,
+      "learning_rate": 1.7663167503423096e-05,
+      "loss": 0.6549,
+      "step": 775
+    },
+    {
+      "epoch": 0.3286770747740345,
+      "grad_norm": 18.033992767333984,
+      "learning_rate": 1.823368324965769e-05,
+      "loss": 0.5369,
+      "step": 800
+    },
+    {
+      "epoch": 0.3389482333607231,
+      "grad_norm": 16.312898635864258,
+      "learning_rate": 1.880419899589229e-05,
+      "loss": 0.6098,
+      "step": 825
+    },
+    {
+      "epoch": 0.3492193919474117,
+      "grad_norm": 14.354256629943848,
+      "learning_rate": 1.9374714742126884e-05,
+      "loss": 0.6917,
+      "step": 850
+    },
+    {
+      "epoch": 0.35949055053410023,
+      "grad_norm": 16.478256225585938,
+      "learning_rate": 1.994523048836148e-05,
+      "loss": 0.6639,
+      "step": 875
+    },
+    {
+      "epoch": 0.3697617091207888,
+      "grad_norm": 3.9060003757476807,
+      "learning_rate": 2.0515746234596075e-05,
+      "loss": 0.6152,
+      "step": 900
+    },
+    {
+      "epoch": 0.3800328677074774,
+      "grad_norm": 22.826074600219727,
+      "learning_rate": 2.108626198083067e-05,
+      "loss": 0.7206,
+      "step": 925
+    },
+    {
+      "epoch": 0.390304026294166,
+      "grad_norm": 8.423962593078613,
+      "learning_rate": 2.1656777727065268e-05,
+      "loss": 0.6332,
+      "step": 950
+    },
+    {
+      "epoch": 0.4005751848808546,
+      "grad_norm": 6.902738094329834,
+      "learning_rate": 2.2227293473299863e-05,
+      "loss": 0.7475,
+      "step": 975
+    },
+    {
+      "epoch": 0.4108463434675431,
+      "grad_norm": 12.147488594055176,
+      "learning_rate": 2.2797809219534462e-05,
+      "loss": 0.6305,
+      "step": 1000
+    },
+    {
+      "epoch": 0.4211175020542317,
+      "grad_norm": 9.553220748901367,
+      "learning_rate": 2.3368324965769057e-05,
+      "loss": 0.6422,
+      "step": 1025
+    },
+    {
+      "epoch": 0.4313886606409203,
+      "grad_norm": 10.641242980957031,
+      "learning_rate": 2.3938840712003652e-05,
+      "loss": 0.7081,
+      "step": 1050
+    },
+    {
+      "epoch": 0.4416598192276089,
+      "grad_norm": 24.202816009521484,
+      "learning_rate": 2.4509356458238247e-05,
+      "loss": 0.7302,
+      "step": 1075
+    },
+    {
+      "epoch": 0.4519309778142975,
+      "grad_norm": 28.98261070251465,
+      "learning_rate": 2.5079872204472842e-05,
+      "loss": 0.6791,
+      "step": 1100
+    },
+    {
+      "epoch": 0.462202136400986,
+      "grad_norm": 9.403679847717285,
+      "learning_rate": 2.5650387950707437e-05,
+      "loss": 0.6186,
+      "step": 1125
+    },
+    {
+      "epoch": 0.4724732949876746,
+      "grad_norm": 29.80078125,
+      "learning_rate": 2.622090369694204e-05,
+      "loss": 0.6075,
+      "step": 1150
+    },
+    {
+      "epoch": 0.4827444535743632,
+      "grad_norm": 6.909419059753418,
+      "learning_rate": 2.6791419443176634e-05,
+      "loss": 0.6069,
+      "step": 1175
+    },
+    {
+      "epoch": 0.4930156121610518,
+      "grad_norm": 23.68714141845703,
+      "learning_rate": 2.7339114559561846e-05,
+      "loss": 0.6602,
+      "step": 1200
+    },
+    {
+      "epoch": 0.5032867707477403,
+      "grad_norm": 11.304485321044922,
+      "learning_rate": 2.790963030579644e-05,
+      "loss": 0.689,
+      "step": 1225
+    },
+    {
+      "epoch": 0.5135579293344289,
+      "grad_norm": 9.599061965942383,
+      "learning_rate": 2.8480146052031036e-05,
+      "loss": 0.6947,
+      "step": 1250
+    },
+    {
+      "epoch": 0.5238290879211175,
+      "grad_norm": 17.570533752441406,
+      "learning_rate": 2.905066179826563e-05,
+      "loss": 0.6341,
+      "step": 1275
+    },
+    {
+      "epoch": 0.5341002465078061,
+      "grad_norm": 23.402406692504883,
+      "learning_rate": 2.962117754450023e-05,
+      "loss": 0.6246,
+      "step": 1300
+    },
+    {
+      "epoch": 0.5443714050944947,
+      "grad_norm": 14.925309181213379,
+      "learning_rate": 3.0191693290734825e-05,
+      "loss": 0.6841,
+      "step": 1325
+    },
+    {
+      "epoch": 0.5546425636811833,
+      "grad_norm": 29.768821716308594,
+      "learning_rate": 3.076220903696942e-05,
+      "loss": 0.6018,
+      "step": 1350
+    },
+    {
+      "epoch": 0.5649137222678718,
+      "grad_norm": 36.33218765258789,
+      "learning_rate": 3.133272478320402e-05,
+      "loss": 0.6589,
+      "step": 1375
+    },
+    {
+      "epoch": 0.5751848808545604,
+      "grad_norm": 15.992273330688477,
+      "learning_rate": 3.1903240529438614e-05,
+      "loss": 0.6581,
+      "step": 1400
+    },
+    {
+      "epoch": 0.585456039441249,
+      "grad_norm": 21.353836059570312,
+      "learning_rate": 3.247375627567321e-05,
+      "loss": 0.6803,
+      "step": 1425
+    },
+    {
+      "epoch": 0.5957271980279375,
+      "grad_norm": 19.363903045654297,
+      "learning_rate": 3.3044272021907804e-05,
+      "loss": 0.6636,
+      "step": 1450
+    },
+    {
+      "epoch": 0.6059983566146261,
+      "grad_norm": 9.495320320129395,
+      "learning_rate": 3.3614787768142406e-05,
+      "loss": 0.6562,
+      "step": 1475
+    },
+    {
+      "epoch": 0.6162695152013147,
+      "grad_norm": 18.960094451904297,
+      "learning_rate": 3.4185303514377e-05,
+      "loss": 0.6603,
+      "step": 1500
+    },
+    {
+      "epoch": 0.6265406737880033,
+      "grad_norm": 23.797956466674805,
+      "learning_rate": 3.4755819260611596e-05,
+      "loss": 0.6329,
+      "step": 1525
+    },
+    {
+      "epoch": 0.6368118323746919,
+      "grad_norm": 13.95300579071045,
+      "learning_rate": 3.532633500684619e-05,
+      "loss": 0.7285,
+      "step": 1550
+    },
+    {
+      "epoch": 0.6470829909613804,
+      "grad_norm": 21.96845245361328,
+      "learning_rate": 3.5896850753080786e-05,
+      "loss": 0.7046,
+      "step": 1575
+    },
+    {
+      "epoch": 0.657354149548069,
+      "grad_norm": 22.858348846435547,
+      "learning_rate": 3.646736649931538e-05,
+      "loss": 0.6791,
+      "step": 1600
+    },
+    {
+      "epoch": 0.6676253081347576,
+      "grad_norm": 19.458847045898438,
+      "learning_rate": 3.7037882245549977e-05,
+      "loss": 0.7317,
+      "step": 1625
+    },
+    {
+      "epoch": 0.6778964667214462,
+      "grad_norm": 5.415579319000244,
+      "learning_rate": 3.760839799178458e-05,
+      "loss": 0.6838,
+      "step": 1650
+    },
+    {
+      "epoch": 0.6881676253081348,
+      "grad_norm": 16.664749145507812,
+      "learning_rate": 3.8178913738019174e-05,
+      "loss": 0.5959,
+      "step": 1675
+    },
+    {
+      "epoch": 0.6984387838948234,
+      "grad_norm": 14.080728530883789,
+      "learning_rate": 3.874942948425377e-05,
+      "loss": 0.6315,
+      "step": 1700
+    },
+    {
+      "epoch": 0.7087099424815119,
+      "grad_norm": 10.989696502685547,
+      "learning_rate": 3.9319945230488364e-05,
+      "loss": 0.7356,
+      "step": 1725
+    },
+    {
+      "epoch": 0.7189811010682005,
+      "grad_norm": 11.903764724731445,
+      "learning_rate": 3.989046097672296e-05,
+      "loss": 0.6935,
+      "step": 1750
+    },
+    {
+      "epoch": 0.729252259654889,
+      "grad_norm": 3.6907122135162354,
+      "learning_rate": 4.0460976722957554e-05,
+      "loss": 0.6909,
+      "step": 1775
+    },
+    {
+      "epoch": 0.7395234182415776,
+      "grad_norm": 10.299661636352539,
+      "learning_rate": 4.103149246919215e-05,
+      "loss": 0.7183,
+      "step": 1800
+    },
+    {
+      "epoch": 0.7497945768282662,
+      "grad_norm": 18.38177490234375,
+      "learning_rate": 4.1602008215426744e-05,
+      "loss": 0.6862,
+      "step": 1825
+    },
+    {
+      "epoch": 0.7600657354149548,
+      "grad_norm": 10.906993865966797,
+      "learning_rate": 4.217252396166134e-05,
+      "loss": 0.7489,
+      "step": 1850
+    },
+    {
+      "epoch": 0.7703368940016434,
+      "grad_norm": 11.314064025878906,
+      "learning_rate": 4.2743039707895935e-05,
+      "loss": 0.7735,
+      "step": 1875
+    },
+    {
+      "epoch": 0.780608052588332,
+      "grad_norm": 30.717824935913086,
+      "learning_rate": 4.3313555454130536e-05,
+      "loss": 0.7382,
+      "step": 1900
+    },
+    {
+      "epoch": 0.7908792111750206,
+      "grad_norm": 26.039527893066406,
+      "learning_rate": 4.388407120036513e-05,
+      "loss": 0.741,
+      "step": 1925
+    },
+    {
+      "epoch": 0.8011503697617092,
+      "grad_norm": 21.928796768188477,
+      "learning_rate": 4.445458694659973e-05,
+      "loss": 0.648,
+      "step": 1950
+    },
+    {
+      "epoch": 0.8114215283483976,
+      "grad_norm": 15.757039070129395,
+      "learning_rate": 4.502510269283433e-05,
+      "loss": 0.6965,
+      "step": 1975
+    },
+    {
+      "epoch": 0.8216926869350862,
+      "grad_norm": 4.059453010559082,
+      "learning_rate": 4.5595618439068924e-05,
+      "loss": 0.5749,
+      "step": 2000
+    },
+    {
+      "epoch": 0.8319638455217748,
+      "grad_norm": 25.09820556640625,
+      "learning_rate": 4.616613418530352e-05,
+      "loss": 0.7363,
+      "step": 2025
+    },
+    {
+      "epoch": 0.8422350041084634,
+      "grad_norm": 22.875272750854492,
+      "learning_rate": 4.6736649931538114e-05,
+      "loss": 0.7879,
+      "step": 2050
+    },
+    {
+      "epoch": 0.852506162695152,
+      "grad_norm": 27.716041564941406,
+      "learning_rate": 4.730716567777271e-05,
+      "loss": 0.6949,
+      "step": 2075
+    },
+    {
+      "epoch": 0.8627773212818406,
+      "grad_norm": 7.766871452331543,
+      "learning_rate": 4.7877681424007304e-05,
+      "loss": 0.7501,
+      "step": 2100
+    },
+    {
+      "epoch": 0.8730484798685292,
+      "grad_norm": 50.67536163330078,
+      "learning_rate": 4.84481971702419e-05,
+      "loss": 0.7346,
+      "step": 2125
+    },
+    {
+      "epoch": 0.8833196384552178,
+      "grad_norm": 30.061460494995117,
+      "learning_rate": 4.9018712916476494e-05,
+      "loss": 0.702,
+      "step": 2150
+    },
+    {
+      "epoch": 0.8935907970419064,
+      "grad_norm": 19.098934173583984,
+      "learning_rate": 4.9589228662711096e-05,
+      "loss": 0.7304,
+      "step": 2175
+    },
+    {
+      "epoch": 0.903861955628595,
+      "grad_norm": 23.1389217376709,
+      "learning_rate": 4.9999984447069524e-05,
+      "loss": 0.7753,
+      "step": 2200
+    },
+    {
+      "epoch": 0.9141331142152835,
+      "grad_norm": 15.105667114257812,
+      "learning_rate": 4.9999674976164344e-05,
+      "loss": 0.7878,
+      "step": 2225
+    },
+    {
+      "epoch": 0.924404272801972,
+      "grad_norm": 38.34989929199219,
+      "learning_rate": 4.9998968752470534e-05,
+      "loss": 0.7374,
+      "step": 2250
+    },
+    {
+      "epoch": 0.9346754313886606,
+      "grad_norm": 16.31295394897461,
+      "learning_rate": 4.9997865787196075e-05,
+      "loss": 0.6709,
+      "step": 2275
+    },
+    {
+      "epoch": 0.9449465899753492,
+      "grad_norm": 9.819225311279297,
+      "learning_rate": 4.9996366097845385e-05,
+      "loss": 0.7123,
+      "step": 2300
+    },
+    {
+      "epoch": 0.9552177485620378,
+      "grad_norm": 21.98024559020996,
+      "learning_rate": 4.999446970821902e-05,
+      "loss": 0.7217,
+      "step": 2325
+    },
+    {
+      "epoch": 0.9654889071487264,
+      "grad_norm": 13.155325889587402,
+      "learning_rate": 4.999217664841326e-05,
+      "loss": 0.7577,
+      "step": 2350
+    },
+    {
+      "epoch": 0.975760065735415,
+      "grad_norm": 7.717748641967773,
+      "learning_rate": 4.998948695481972e-05,
+      "loss": 0.7836,
+      "step": 2375
+    },
+    {
+      "epoch": 0.9860312243221035,
+      "grad_norm": 8.747546195983887,
+      "learning_rate": 4.998640067012468e-05,
+      "loss": 0.8113,
+      "step": 2400
+    },
+    {
+      "epoch": 0.9963023829087921,
+      "grad_norm": 2.1325223445892334,
+      "learning_rate": 4.998291784330846e-05,
+      "loss": 0.7189,
+      "step": 2425
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6579677386211856,
+      "eval_f1_macro": 0.4525953534948945,
+      "eval_f1_micro": 0.6579677386211856,
+      "eval_f1_weighted": 0.6282433368073111,
+      "eval_loss": 0.9721999168395996,
+      "eval_precision_macro": 0.49011451139838114,
+      "eval_precision_micro": 0.6579677386211856,
+      "eval_precision_weighted": 0.6153964777282442,
+      "eval_recall_macro": 0.44923284497959803,
+      "eval_recall_micro": 0.6579677386211856,
+      "eval_recall_weighted": 0.6579677386211856,
+      "eval_runtime": 20.3328,
+      "eval_samples_per_second": 478.684,
+      "eval_steps_per_second": 15.0,
+      "step": 2434
+    },
+    {
+      "epoch": 1.0065735414954806,
+      "grad_norm": 13.705938339233398,
+      "learning_rate": 4.997903852964464e-05,
+      "loss": 0.8531,
+      "step": 2450
+    },
+    {
+      "epoch": 1.0168447000821692,
+      "grad_norm": 13.030699729919434,
+      "learning_rate": 4.997476279069914e-05,
+      "loss": 0.6717,
+      "step": 2475
+    },
+    {
+      "epoch": 1.0271158586688578,
+      "grad_norm": 15.674768447875977,
+      "learning_rate": 4.9970090694329316e-05,
+      "loss": 0.6566,
+      "step": 2500
+    },
+    {
+      "epoch": 1.0373870172555464,
+      "grad_norm": 11.915541648864746,
+      "learning_rate": 4.996502231468281e-05,
+      "loss": 0.7618,
+      "step": 2525
+    },
+    {
+      "epoch": 1.047658175842235,
+      "grad_norm": 7.769870281219482,
+      "learning_rate": 4.995955773219642e-05,
+      "loss": 0.7373,
+      "step": 2550
+    },
+    {
+      "epoch": 1.0579293344289236,
+      "grad_norm": 10.591928482055664,
+      "learning_rate": 4.9953697033594805e-05,
+      "loss": 0.7792,
+      "step": 2575
+    },
+    {
+      "epoch": 1.0682004930156122,
+      "grad_norm": 7.153607368469238,
+      "learning_rate": 4.9947440311889124e-05,
+      "loss": 0.7231,
+      "step": 2600
+    },
+    {
+      "epoch": 1.0784716516023007,
+      "grad_norm": 32.34482955932617,
+      "learning_rate": 4.994078766637553e-05,
+      "loss": 0.7746,
+      "step": 2625
+    },
+    {
+      "epoch": 1.0887428101889893,
+      "grad_norm": 14.119093894958496,
+      "learning_rate": 4.993373920263363e-05,
+      "loss": 0.7627,
+      "step": 2650
+    },
+    {
+      "epoch": 1.099013968775678,
+      "grad_norm": 6.369531631469727,
+      "learning_rate": 4.9926295032524774e-05,
+      "loss": 0.6739,
+      "step": 2675
+    },
+    {
+      "epoch": 1.1092851273623665,
+      "grad_norm": 32.834388732910156,
+      "learning_rate": 4.991845527419032e-05,
+      "loss": 0.7351,
+      "step": 2700
+    },
+    {
+      "epoch": 1.119556285949055,
+      "grad_norm": 2.3463103771209717,
+      "learning_rate": 4.991022005204972e-05,
+      "loss": 0.699,
+      "step": 2725
+    },
+    {
+      "epoch": 1.1298274445357437,
+      "grad_norm": 19.840364456176758,
+      "learning_rate": 4.990158949679856e-05,
+      "loss": 0.7338,
+      "step": 2750
+    },
+    {
+      "epoch": 1.1400986031224323,
+      "grad_norm": 18.111513137817383,
+      "learning_rate": 4.9892563745406506e-05,
+      "loss": 0.6833,
+      "step": 2775
+    },
+    {
+      "epoch": 1.1503697617091209,
+      "grad_norm": 8.486504554748535,
+      "learning_rate": 4.98831429411151e-05,
+      "loss": 0.8016,
+      "step": 2800
+    },
+    {
+      "epoch": 1.1606409202958095,
+      "grad_norm": 18.1845703125,
+      "learning_rate": 4.9873327233435487e-05,
+      "loss": 0.7478,
+      "step": 2825
+    },
+    {
+      "epoch": 1.170912078882498,
+      "grad_norm": 13.720683097839355,
+      "learning_rate": 4.9863116778146086e-05,
+      "loss": 0.6556,
+      "step": 2850
+    },
+    {
+      "epoch": 1.1811832374691864,
+      "grad_norm": 0.879672646522522,
+      "learning_rate": 4.9852511737290065e-05,
+      "loss": 0.773,
+      "step": 2875
+    },
+    {
+      "epoch": 1.191454396055875,
+      "grad_norm": 5.223727703094482,
+      "learning_rate": 4.9841512279172776e-05,
+      "loss": 0.6251,
+      "step": 2900
+    },
+    {
+      "epoch": 1.2017255546425636,
+      "grad_norm": 26.75752830505371,
+      "learning_rate": 4.983011857835914e-05,
+      "loss": 0.7045,
+      "step": 2925
+    },
+    {
+      "epoch": 1.2119967132292522,
+      "grad_norm": 18.040035247802734,
+      "learning_rate": 4.98183308156708e-05,
+      "loss": 0.6991,
+      "step": 2950
+    },
+    {
+      "epoch": 1.2222678718159408,
+      "grad_norm": 1.8346068859100342,
+      "learning_rate": 4.980614917818329e-05,
+      "loss": 0.648,
+      "step": 2975
+    },
+    {
+      "epoch": 1.2325390304026294,
+      "grad_norm": 13.46601390838623,
+      "learning_rate": 4.9793573859223076e-05,
+      "loss": 0.656,
+      "step": 3000
+    },
+    {
+      "epoch": 1.242810188989318,
+      "grad_norm": 18.629392623901367,
+      "learning_rate": 4.978060505836447e-05,
+      "loss": 0.7385,
+      "step": 3025
+    },
+    {
+      "epoch": 1.2530813475760065,
+      "grad_norm": 8.506892204284668,
+      "learning_rate": 4.976724298142646e-05,
+      "loss": 0.6356,
+      "step": 3050
+    },
+    {
+      "epoch": 1.2633525061626951,
+      "grad_norm": 24.73647689819336,
+      "learning_rate": 4.975348784046946e-05,
+      "loss": 0.7267,
+      "step": 3075
+    },
+    {
+      "epoch": 1.2736236647493837,
+      "grad_norm": 4.445428848266602,
+      "learning_rate": 4.9739339853791925e-05,
+      "loss": 0.7148,
+      "step": 3100
+    },
+    {
+      "epoch": 1.2838948233360723,
+      "grad_norm": 15.720209121704102,
+      "learning_rate": 4.9724799245926895e-05,
+      "loss": 0.7137,
+      "step": 3125
+    },
+    {
+      "epoch": 1.2941659819227609,
+      "grad_norm": 21.483966827392578,
+      "learning_rate": 4.970986624763845e-05,
+      "loss": 0.6836,
+      "step": 3150
+    },
+    {
+      "epoch": 1.3044371405094495,
+      "grad_norm": 5.221400737762451,
+      "learning_rate": 4.969516162830212e-05,
+      "loss": 0.7124,
+      "step": 3175
+    },
+    {
+      "epoch": 1.314708299096138,
+      "grad_norm": 25.75131607055664,
+      "learning_rate": 4.967946023802564e-05,
+      "loss": 0.7195,
+      "step": 3200
+    },
+    {
+      "epoch": 1.3249794576828267,
+      "grad_norm": 5.5664167404174805,
+      "learning_rate": 4.96633671768702e-05,
+      "loss": 0.7364,
+      "step": 3225
+    },
+    {
+      "epoch": 1.3352506162695152,
+      "grad_norm": 12.063735008239746,
+      "learning_rate": 4.9646882700237805e-05,
+      "loss": 0.5976,
+      "step": 3250
+    },
+    {
+      "epoch": 1.3455217748562038,
+      "grad_norm": 30.29339027404785,
+      "learning_rate": 4.9630007069742345e-05,
+      "loss": 0.714,
+      "step": 3275
+    },
+    {
+      "epoch": 1.3557929334428924,
+      "grad_norm": 3.4963226318359375,
+      "learning_rate": 4.961274055320543e-05,
+      "loss": 0.6268,
+      "step": 3300
+    },
+    {
+      "epoch": 1.366064092029581,
+      "grad_norm": 13.199789047241211,
+      "learning_rate": 4.9595083424652164e-05,
+      "loss": 0.7015,
+      "step": 3325
+    },
+    {
+      "epoch": 1.3763352506162696,
+      "grad_norm": 30.219947814941406,
+      "learning_rate": 4.957703596430679e-05,
+      "loss": 0.6893,
+      "step": 3350
+    },
+    {
+      "epoch": 1.3866064092029582,
+      "grad_norm": 16.250619888305664,
+      "learning_rate": 4.955859845858826e-05,
+      "loss": 0.7416,
+      "step": 3375
+    },
+    {
+      "epoch": 1.3968775677896468,
+      "grad_norm": 11.023963928222656,
+      "learning_rate": 4.953977120010563e-05,
+      "loss": 0.5854,
+      "step": 3400
+    },
+    {
+      "epoch": 1.4071487263763354,
+      "grad_norm": 11.984811782836914,
+      "learning_rate": 4.952055448765348e-05,
+      "loss": 0.7531,
+      "step": 3425
+    },
+    {
+      "epoch": 1.417419884963024,
+      "grad_norm": 10.170748710632324,
+      "learning_rate": 4.950094862620715e-05,
+      "loss": 0.684,
+      "step": 3450
+    },
+    {
+      "epoch": 1.4276910435497125,
+      "grad_norm": 26.633150100708008,
+      "learning_rate": 4.9480953926917886e-05,
+      "loss": 0.6428,
+      "step": 3475
+    },
+    {
+      "epoch": 1.437962202136401,
+      "grad_norm": 71.91981506347656,
+      "learning_rate": 4.946057070710793e-05,
+      "loss": 0.7043,
+      "step": 3500
+    },
+    {
+      "epoch": 1.4482333607230895,
+      "grad_norm": 55.96223831176758,
+      "learning_rate": 4.943979929026546e-05,
+      "loss": 0.8018,
+      "step": 3525
+    },
+    {
+      "epoch": 1.458504519309778,
+      "grad_norm": 25.171337127685547,
+      "learning_rate": 4.9418640006039464e-05,
+      "loss": 0.6975,
+      "step": 3550
+    },
+    {
+      "epoch": 1.4687756778964667,
+      "grad_norm": 16.321897506713867,
+      "learning_rate": 4.9397093190234495e-05,
+      "loss": 0.7242,
+      "step": 3575
+    },
+    {
+      "epoch": 1.4790468364831553,
+      "grad_norm": 14.492510795593262,
+      "learning_rate": 4.937515918480538e-05,
+      "loss": 0.6604,
+      "step": 3600
+    },
+    {
+      "epoch": 1.4893179950698439,
+      "grad_norm": 3.452258348464966,
+      "learning_rate": 4.935283833785176e-05,
+      "loss": 0.7074,
+      "step": 3625
+    },
+    {
+      "epoch": 1.4995891536565324,
+      "grad_norm": 50.8736457824707,
+      "learning_rate": 4.933013100361257e-05,
+      "loss": 0.8395,
+      "step": 3650
+    },
+    {
+      "epoch": 1.509860312243221,
+      "grad_norm": 17.3315372467041,
+      "learning_rate": 4.930703754246041e-05,
+      "loss": 0.7815,
+      "step": 3675
+    },
+    {
+      "epoch": 1.5201314708299096,
+      "grad_norm": 27.960744857788086,
+      "learning_rate": 4.928355832089587e-05,
+      "loss": 0.8035,
+      "step": 3700
+    },
+    {
+      "epoch": 1.5304026294165982,
+      "grad_norm": 10.72488784790039,
+      "learning_rate": 4.9259693711541645e-05,
+      "loss": 0.7133,
+      "step": 3725
+    },
+    {
+      "epoch": 1.5406737880032868,
+      "grad_norm": 32.7005729675293,
+      "learning_rate": 4.923544409313668e-05,
+      "loss": 0.6515,
+      "step": 3750
+    },
+    {
+      "epoch": 1.5509449465899754,
+      "grad_norm": 23.12322998046875,
+      "learning_rate": 4.921080985053012e-05,
+      "loss": 0.762,
+      "step": 3775
+    },
+    {
+      "epoch": 1.561216105176664,
+      "grad_norm": 14.074947357177734,
+      "learning_rate": 4.918579137467523e-05,
+      "loss": 0.7669,
+      "step": 3800
+    },
+    {
+      "epoch": 1.5714872637633523,
+      "grad_norm": 23.544923782348633,
+      "learning_rate": 4.9160389062623166e-05,
+      "loss": 0.6909,
+      "step": 3825
+    },
+    {
+      "epoch": 1.581758422350041,
+      "grad_norm": 12.691238403320312,
+      "learning_rate": 4.9134603317516714e-05,
+      "loss": 0.6231,
+      "step": 3850
+    },
+    {
+      "epoch": 1.5920295809367295,
+      "grad_norm": 48.76417541503906,
+      "learning_rate": 4.910843454858383e-05,
+      "loss": 0.7732,
+      "step": 3875
+    },
+    {
+      "epoch": 1.6023007395234181,
+      "grad_norm": 12.748458862304688,
+      "learning_rate": 4.908188317113119e-05,
+      "loss": 0.6368,
+      "step": 3900
+    },
+    {
+      "epoch": 1.6125718981101067,
+      "grad_norm": 52.46083068847656,
+      "learning_rate": 4.905494960653762e-05,
+      "loss": 0.7586,
+      "step": 3925
+    },
+    {
+      "epoch": 1.6228430566967953,
+      "grad_norm": 31.678646087646484,
+      "learning_rate": 4.902763428224733e-05,
+      "loss": 0.8258,
+      "step": 3950
+    },
+    {
+      "epoch": 1.6331142152834839,
+      "grad_norm": 9.948538780212402,
+      "learning_rate": 4.8999937631763196e-05,
+      "loss": 0.7389,
+      "step": 3975
+    },
+    {
+      "epoch": 1.6433853738701725,
+      "grad_norm": 19.353836059570312,
+      "learning_rate": 4.8971860094639874e-05,
+      "loss": 0.6712,
+      "step": 4000
+    },
+    {
+      "epoch": 1.653656532456861,
+      "grad_norm": 12.594712257385254,
+      "learning_rate": 4.894340211647681e-05,
+      "loss": 0.7761,
+      "step": 4025
+    },
+    {
+      "epoch": 1.6639276910435497,
+      "grad_norm": 25.41390609741211,
+      "learning_rate": 4.891456414891116e-05,
+      "loss": 0.7182,
+      "step": 4050
+    },
+    {
+      "epoch": 1.6741988496302382,
+      "grad_norm": 19.755008697509766,
+      "learning_rate": 4.888534664961062e-05,
+      "loss": 0.6639,
+      "step": 4075
+    },
+    {
+      "epoch": 1.6844700082169268,
+      "grad_norm": 20.488435745239258,
+      "learning_rate": 4.8855750082266216e-05,
+      "loss": 0.7489,
+      "step": 4100
+    },
+    {
+      "epoch": 1.6947411668036154,
+      "grad_norm": 9.432578086853027,
+      "learning_rate": 4.882577491658486e-05,
+      "loss": 0.7505,
+      "step": 4125
+    },
+    {
+      "epoch": 1.705012325390304,
+      "grad_norm": 24.474281311035156,
+      "learning_rate": 4.879542162828197e-05,
+      "loss": 0.6442,
+      "step": 4150
+    },
+    {
+      "epoch": 1.7152834839769926,
+      "grad_norm": 22.1693058013916,
+      "learning_rate": 4.876469069907388e-05,
+      "loss": 0.8103,
+      "step": 4175
+    },
+    {
+      "epoch": 1.7255546425636812,
+      "grad_norm": 33.30137634277344,
+      "learning_rate": 4.8733582616670195e-05,
+      "loss": 0.7918,
+      "step": 4200
+    },
+    {
+      "epoch": 1.7358258011503698,
+      "grad_norm": 16.692672729492188,
+      "learning_rate": 4.870209787476608e-05,
+      "loss": 0.69,
+      "step": 4225
+    },
+    {
+      "epoch": 1.7460969597370584,
+      "grad_norm": 9.02397632598877,
+      "learning_rate": 4.86702369730344e-05,
+      "loss": 0.6181,
+      "step": 4250
+    },
+    {
+      "epoch": 1.756368118323747,
+      "grad_norm": 11.770980834960938,
+      "learning_rate": 4.863800041711779e-05,
+      "loss": 0.7505,
+      "step": 4275
+    },
+    {
+      "epoch": 1.7666392769104355,
+      "grad_norm": 30.661998748779297,
+      "learning_rate": 4.860538871862064e-05,
+      "loss": 0.6744,
+      "step": 4300
+    },
+    {
+      "epoch": 1.7769104354971241,
+      "grad_norm": 14.791523933410645,
+      "learning_rate": 4.8572402395100966e-05,
+      "loss": 0.7865,
+      "step": 4325
+    },
+    {
+      "epoch": 1.7871815940838127,
+      "grad_norm": 11.105860710144043,
+      "learning_rate": 4.8539041970062216e-05,
+      "loss": 0.7299,
+      "step": 4350
+    },
+    {
+      "epoch": 1.7974527526705013,
+      "grad_norm": 1.4959219694137573,
+      "learning_rate": 4.8505307972944934e-05,
+      "loss": 0.6649,
+      "step": 4375
+    },
+    {
+      "epoch": 1.80772391125719,
+      "grad_norm": 38.041778564453125,
+      "learning_rate": 4.8471200939118376e-05,
+      "loss": 0.853,
+      "step": 4400
+    },
+    {
+      "epoch": 1.8179950698438785,
+      "grad_norm": 13.44230842590332,
+      "learning_rate": 4.843672140987202e-05,
+      "loss": 0.6944,
+      "step": 4425
+    },
+    {
+      "epoch": 1.828266228430567,
+      "grad_norm": 3.37754487991333,
+      "learning_rate": 4.8401869932406946e-05,
+      "loss": 0.6653,
+      "step": 4450
+    },
+    {
+      "epoch": 1.8385373870172557,
+      "grad_norm": 10.962355613708496,
+      "learning_rate": 4.8366647059827185e-05,
+      "loss": 0.6699,
+      "step": 4475
+    },
+    {
+      "epoch": 1.8488085456039443,
+      "grad_norm": 23.833568572998047,
+      "learning_rate": 4.8331053351130915e-05,
+      "loss": 0.7714,
+      "step": 4500
+    },
+    {
+      "epoch": 1.8590797041906328,
+      "grad_norm": 20.1926212310791,
+      "learning_rate": 4.829508937120162e-05,
+      "loss": 0.6746,
+      "step": 4525
+    },
+    {
+      "epoch": 1.8693508627773214,
+      "grad_norm": 4.717026710510254,
+      "learning_rate": 4.825875569079907e-05,
+      "loss": 0.764,
+      "step": 4550
+    },
+    {
+      "epoch": 1.87962202136401,
+      "grad_norm": 51.555458068847656,
+      "learning_rate": 4.822205288655035e-05,
+      "loss": 0.5943,
+      "step": 4575
+    },
+    {
+      "epoch": 1.8898931799506984,
+      "grad_norm": 17.627649307250977,
+      "learning_rate": 4.818498154094062e-05,
+      "loss": 0.6794,
+      "step": 4600
+    },
+    {
+      "epoch": 1.900164338537387,
+      "grad_norm": 15.7213716506958,
+      "learning_rate": 4.814754224230391e-05,
+      "loss": 0.6973,
+      "step": 4625
+    },
+    {
+      "epoch": 1.9104354971240756,
+      "grad_norm": 9.14035415649414,
+      "learning_rate": 4.81097355848138e-05,
+      "loss": 0.9402,
+      "step": 4650
+    },
+    {
+      "epoch": 1.9207066557107642,
+      "grad_norm": 15.820075988769531,
+      "learning_rate": 4.807156216847396e-05,
+      "loss": 0.8003,
+      "step": 4675
+    },
+    {
+      "epoch": 1.9309778142974527,
+      "grad_norm": 1.9751650094985962,
+      "learning_rate": 4.803302259910863e-05,
+      "loss": 0.746,
+      "step": 4700
+    },
+    {
+      "epoch": 1.9412489728841413,
+      "grad_norm": 34.23078155517578,
+      "learning_rate": 4.799411748835305e-05,
+      "loss": 0.774,
+      "step": 4725
+    },
+    {
+      "epoch": 1.95152013147083,
+      "grad_norm": 33.61397933959961,
+      "learning_rate": 4.7954847453643686e-05,
+      "loss": 0.8032,
+      "step": 4750
+    },
+    {
+      "epoch": 1.9617912900575185,
+      "grad_norm": 22.3249454498291,
+      "learning_rate": 4.7915213118208456e-05,
+      "loss": 0.7198,
+      "step": 4775
+    },
+    {
+      "epoch": 1.972062448644207,
+      "grad_norm": 5.324288368225098,
+      "learning_rate": 4.787521511105689e-05,
+      "loss": 0.7335,
+      "step": 4800
+    },
+    {
+      "epoch": 1.9823336072308957,
+      "grad_norm": 7.5206990242004395,
+      "learning_rate": 4.783485406697005e-05,
+      "loss": 0.7861,
+      "step": 4825
+    },
+    {
+      "epoch": 1.9926047658175843,
+      "grad_norm": 18.279809951782227,
+      "learning_rate": 4.7794130626490544e-05,
+      "loss": 0.659,
+      "step": 4850
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.661871981917189,
+      "eval_f1_macro": 0.458297977909026,
+      "eval_f1_micro": 0.661871981917189,
+      "eval_f1_weighted": 0.6408854719266703,
+      "eval_loss": 0.9106447696685791,
+      "eval_precision_macro": 0.44316873127833,
+      "eval_precision_micro": 0.661871981917189,
+      "eval_precision_weighted": 0.6229543768525556,
+      "eval_recall_macro": 0.4775160837702286,
+      "eval_recall_micro": 0.661871981917189,
+      "eval_recall_weighted": 0.661871981917189,
+      "eval_runtime": 20.2196,
+      "eval_samples_per_second": 481.365,
+      "eval_steps_per_second": 15.084,
+      "step": 4868
+    },
+    {
+      "epoch": 2.0028759244042726,
+      "grad_norm": 27.522146224975586,
+      "learning_rate": 4.775304543591232e-05,
+      "loss": 0.7247,
+      "step": 4875
+    },
+    {
+      "epoch": 2.0131470829909612,
+      "grad_norm": 2.502640962600708,
+      "learning_rate": 4.7711599147270416e-05,
+      "loss": 0.7228,
+      "step": 4900
+    },
+    {
+      "epoch": 2.02341824157765,
+      "grad_norm": 10.220170974731445,
+      "learning_rate": 4.766979241833059e-05,
+      "loss": 0.6397,
+      "step": 4925
+    },
+    {
+      "epoch": 2.0336894001643384,
+      "grad_norm": 23.93318748474121,
+      "learning_rate": 4.7627625912578945e-05,
+      "loss": 0.6842,
+      "step": 4950
+    },
+    {
+      "epoch": 2.043960558751027,
+      "grad_norm": 10.872408866882324,
+      "learning_rate": 4.758510029921132e-05,
+      "loss": 0.7494,
+      "step": 4975
+    },
+    {
+      "epoch": 2.0542317173377156,
+      "grad_norm": 29.06859588623047,
+      "learning_rate": 4.754221625312271e-05,
+      "loss": 0.7152,
+      "step": 5000
+    },
+    {
+      "epoch": 2.064502875924404,
+      "grad_norm": 13.37592887878418,
+      "learning_rate": 4.749897445489656e-05,
+      "loss": 0.6585,
+      "step": 5025
+    },
+    {
+      "epoch": 2.0747740345110928,
+      "grad_norm": 7.18634033203125,
+      "learning_rate": 4.745537559079396e-05,
+      "loss": 0.6494,
+      "step": 5050
+    },
+    {
+      "epoch": 2.0850451930977814,
+      "grad_norm": 5.377128601074219,
+      "learning_rate": 4.741142035274274e-05,
+      "loss": 0.7026,
+      "step": 5075
+    },
+    {
+      "epoch": 2.09531635168447,
+      "grad_norm": 5.074624538421631,
+      "learning_rate": 4.736710943832649e-05,
+      "loss": 0.732,
+      "step": 5100
+    },
+    {
+      "epoch": 2.1055875102711585,
+      "grad_norm": 3.903517246246338,
+      "learning_rate": 4.7322443550773535e-05,
+      "loss": 0.6671,
+      "step": 5125
+    },
+    {
+      "epoch": 2.115858668857847,
+      "grad_norm": 21.475658416748047,
+      "learning_rate": 4.727742339894567e-05,
+      "loss": 0.7383,
+      "step": 5150
+    },
+    {
+      "epoch": 2.1261298274445357,
+      "grad_norm": 17.559850692749023,
+      "learning_rate": 4.723204969732704e-05,
+      "loss": 0.7126,
+      "step": 5175
+    },
+    {
+      "epoch": 2.1364009860312243,
+      "grad_norm": 3.733065366744995,
+      "learning_rate": 4.7186323166012714e-05,
+      "loss": 0.7779,
+      "step": 5200
+    },
+    {
+      "epoch": 2.146672144617913,
+      "grad_norm": 50.46138381958008,
+      "learning_rate": 4.714024453069727e-05,
+      "loss": 0.6233,
+      "step": 5225
+    },
+    {
+      "epoch": 2.1569433032046015,
+      "grad_norm": 56.56818771362305,
+      "learning_rate": 4.7093814522663304e-05,
+      "loss": 0.8527,
+      "step": 5250
+    },
+    {
+      "epoch": 2.16721446179129,
+      "grad_norm": 3.0601882934570312,
+      "learning_rate": 4.70470338787698e-05,
+      "loss": 0.7016,
+      "step": 5275
+    },
+    {
+      "epoch": 2.1774856203779787,
+      "grad_norm": 13.10061264038086,
+      "learning_rate": 4.700179527152305e-05,
+      "loss": 0.7397,
+      "step": 5300
+    },
+    {
+      "epoch": 2.1877567789646672,
+      "grad_norm": 39.44890594482422,
+      "learning_rate": 4.695432954012213e-05,
+      "loss": 0.6456,
+      "step": 5325
+    },
+    {
+      "epoch": 2.198027937551356,
+      "grad_norm": 22.93960952758789,
+      "learning_rate": 4.690651538653268e-05,
+      "loss": 0.7832,
+      "step": 5350
+    },
+    {
+      "epoch": 2.2082990961380444,
+      "grad_norm": 22.193588256835938,
+      "learning_rate": 4.685835356958051e-05,
+      "loss": 0.6787,
+      "step": 5375
+    },
+    {
+      "epoch": 2.218570254724733,
+      "grad_norm": 31.868240356445312,
+      "learning_rate": 4.680984485360899e-05,
+      "loss": 0.6281,
+      "step": 5400
+    },
+    {
+      "epoch": 2.2288414133114216,
+      "grad_norm": 25.054807662963867,
+      "learning_rate": 4.676099000846684e-05,
+      "loss": 0.817,
+      "step": 5425
+    },
+    {
+      "epoch": 2.23911257189811,
+      "grad_norm": 10.673225402832031,
+      "learning_rate": 4.671178980949599e-05,
+      "loss": 0.7448,
+      "step": 5450
+    },
+    {
+      "epoch": 2.249383730484799,
+      "grad_norm": 4.617043972015381,
+      "learning_rate": 4.666224503751923e-05,
+      "loss": 0.7868,
+      "step": 5475
+    },
+    {
+      "epoch": 2.2596548890714874,
+      "grad_norm": 16.736486434936523,
+      "learning_rate": 4.661235647882785e-05,
+      "loss": 0.7392,
+      "step": 5500
+    },
+    {
+      "epoch": 2.269926047658176,
+      "grad_norm": 143.8787078857422,
+      "learning_rate": 4.6562124925169126e-05,
+      "loss": 0.7608,
+      "step": 5525
+    },
+    {
+      "epoch": 2.2801972062448645,
+      "grad_norm": 12.933761596679688,
+      "learning_rate": 4.651155117373378e-05,
+      "loss": 0.7637,
+      "step": 5550
+    },
+    {
+      "epoch": 2.290468364831553,
+      "grad_norm": 19.505279541015625,
+      "learning_rate": 4.646063602714334e-05,
+      "loss": 0.72,
+      "step": 5575
+    },
+    {
+      "epoch": 2.3007395234182417,
+      "grad_norm": 18.15628433227539,
+      "learning_rate": 4.6409380293437355e-05,
+      "loss": 0.7434,
+      "step": 5600
+    },
+    {
+      "epoch": 2.3110106820049303,
+      "grad_norm": 184.67726135253906,
+      "learning_rate": 4.635778478606061e-05,
+      "loss": 0.6824,
+      "step": 5625
+    },
+    {
+      "epoch": 2.321281840591619,
+      "grad_norm": 15.403681755065918,
+      "learning_rate": 4.6305850323850216e-05,
+      "loss": 0.7953,
+      "step": 5650
+    },
+    {
+      "epoch": 2.3315529991783075,
+      "grad_norm": 22.72369956970215,
+      "learning_rate": 4.625357773102259e-05,
+      "loss": 0.7348,
+      "step": 5675
+    },
+    {
+      "epoch": 2.341824157764996,
+      "grad_norm": 9.090083122253418,
+      "learning_rate": 4.62009678371604e-05,
+      "loss": 0.6719,
+      "step": 5700
+    },
+    {
+      "epoch": 2.3520953163516847,
+      "grad_norm": 13.648232460021973,
+      "learning_rate": 4.6148021477199386e-05,
+      "loss": 0.7489,
+      "step": 5725
+    },
+    {
+      "epoch": 2.362366474938373,
+      "grad_norm": 24.75493621826172,
+      "learning_rate": 4.60947394914151e-05,
+      "loss": 0.7936,
+      "step": 5750
+    },
+    {
+      "epoch": 2.372637633525062,
+      "grad_norm": 15.41668701171875,
+      "learning_rate": 4.604112272540961e-05,
+      "loss": 0.7393,
+      "step": 5775
+    },
+    {
+      "epoch": 2.38290879211175,
+      "grad_norm": 20.62226104736328,
+      "learning_rate": 4.5987172030098024e-05,
+      "loss": 0.6496,
+      "step": 5800
+    },
+    {
+      "epoch": 2.393179950698439,
+      "grad_norm": 29.890432357788086,
+      "learning_rate": 4.5932888261695026e-05,
+      "loss": 0.7946,
+      "step": 5825
+    },
+    {
+      "epoch": 2.403451109285127,
+      "grad_norm": 45.65409851074219,
+      "learning_rate": 4.587827228170129e-05,
+      "loss": 0.668,
+      "step": 5850
+    },
+    {
+      "epoch": 2.4137222678718158,
+      "grad_norm": 23.496747970581055,
+      "learning_rate": 4.5823324956889764e-05,
+      "loss": 0.7671,
+      "step": 5875
+    },
+    {
+      "epoch": 2.4239934264585044,
+      "grad_norm": 107.06421661376953,
+      "learning_rate": 4.576804715929196e-05,
+      "loss": 0.7784,
+      "step": 5900
+    },
+    {
+      "epoch": 2.434264585045193,
+      "grad_norm": 24.115577697753906,
+      "learning_rate": 4.571243976618411e-05,
+      "loss": 0.6693,
+      "step": 5925
+    },
+    {
+      "epoch": 2.4445357436318815,
+      "grad_norm": 7.726546287536621,
+      "learning_rate": 4.56565036600732e-05,
+      "loss": 0.7062,
+      "step": 5950
+    },
+    {
+      "epoch": 2.45480690221857,
+      "grad_norm": 8.461091041564941,
+      "learning_rate": 4.5600239728683035e-05,
+      "loss": 0.7455,
+      "step": 5975
+    },
+    {
+      "epoch": 2.4650780608052587,
+      "grad_norm": 19.741405487060547,
+      "learning_rate": 4.554364886494008e-05,
+      "loss": 0.7246,
+      "step": 6000
+    },
+    {
+      "epoch": 2.4753492193919473,
+      "grad_norm": 15.383966445922852,
+      "learning_rate": 4.548673196695934e-05,
+      "loss": 0.6792,
+      "step": 6025
+    },
+    {
+      "epoch": 2.485620377978636,
+      "grad_norm": 31.002105712890625,
+      "learning_rate": 4.5429489938030055e-05,
+      "loss": 0.6649,
+      "step": 6050
+    },
+    {
+      "epoch": 2.4958915365653245,
+      "grad_norm": 20.87677001953125,
+      "learning_rate": 4.5371923686601446e-05,
+      "loss": 0.8137,
+      "step": 6075
+    },
+    {
+      "epoch": 2.506162695152013,
+      "grad_norm": 6.823126792907715,
+      "learning_rate": 4.531403412626821e-05,
+      "loss": 0.7345,
+      "step": 6100
+    },
+    {
+      "epoch": 2.5164338537387017,
+      "grad_norm": 10.97050952911377,
+      "learning_rate": 4.525582217575607e-05,
+      "loss": 0.662,
+      "step": 6125
+    },
+    {
+      "epoch": 2.5267050123253902,
+      "grad_norm": 23.480966567993164,
+      "learning_rate": 4.5197288758907194e-05,
+      "loss": 0.7744,
+      "step": 6150
+    },
+    {
+      "epoch": 2.536976170912079,
+      "grad_norm": 71.35833740234375,
+      "learning_rate": 4.5138434804665505e-05,
+      "loss": 0.6405,
+      "step": 6175
+    },
+    {
+      "epoch": 2.5472473294987674,
+      "grad_norm": 2.59698486328125,
+      "learning_rate": 4.5079261247061974e-05,
+      "loss": 0.7056,
+      "step": 6200
+    },
+    {
+      "epoch": 2.557518488085456,
+      "grad_norm": 13.829898834228516,
+      "learning_rate": 4.501976902519975e-05,
+      "loss": 0.7639,
+      "step": 6225
+    },
+    {
+      "epoch": 2.5677896466721446,
+      "grad_norm": 32.521240234375,
+      "learning_rate": 4.4959959083239335e-05,
+      "loss": 0.711,
+      "step": 6250
+    },
+    {
+      "epoch": 2.578060805258833,
+      "grad_norm": 9.438847541809082,
+      "learning_rate": 4.489983237038349e-05,
+      "loss": 0.7104,
+      "step": 6275
+    },
+    {
+      "epoch": 2.5883319638455218,
+      "grad_norm": 9.817651748657227,
+      "learning_rate": 4.483938984086228e-05,
+      "loss": 0.7139,
+      "step": 6300
+    },
+    {
+      "epoch": 2.5986031224322104,
+      "grad_norm": 8.208524703979492,
+      "learning_rate": 4.4778632453917854e-05,
+      "loss": 0.7199,
+      "step": 6325
+    },
+    {
+      "epoch": 2.608874281018899,
+      "grad_norm": 20.00709342956543,
+      "learning_rate": 4.471756117378926e-05,
+      "loss": 0.7504,
+      "step": 6350
+    },
+    {
+      "epoch": 2.6191454396055875,
+      "grad_norm": 8.180582046508789,
+      "learning_rate": 4.4656176969697123e-05,
+      "loss": 0.7396,
+      "step": 6375
+    },
+    {
+      "epoch": 2.629416598192276,
+      "grad_norm": 21.006549835205078,
+      "learning_rate": 4.4594480815828274e-05,
+      "loss": 0.767,
+      "step": 6400
+    },
+    {
+      "epoch": 2.6396877567789647,
+      "grad_norm": 9.309197425842285,
+      "learning_rate": 4.45324736913203e-05,
+      "loss": 0.7447,
+      "step": 6425
+    },
+    {
+      "epoch": 2.6499589153656533,
+      "grad_norm": 47.43417739868164,
+      "learning_rate": 4.447015658024595e-05,
+      "loss": 0.7457,
+      "step": 6450
+    },
+    {
+      "epoch": 2.660230073952342,
+      "grad_norm": 14.902249336242676,
+      "learning_rate": 4.4407530471597614e-05,
+      "loss": 0.652,
+      "step": 6475
+    },
+    {
+      "epoch": 2.6705012325390305,
+      "grad_norm": 16.622949600219727,
+      "learning_rate": 4.434459635927152e-05,
+      "loss": 0.6759,
+      "step": 6500
+    },
+    {
+      "epoch": 2.680772391125719,
+      "grad_norm": 45.0763053894043,
+      "learning_rate": 4.428135524205206e-05,
+      "loss": 0.6605,
+      "step": 6525
+    },
+    {
+      "epoch": 2.6910435497124077,
+      "grad_norm": 160.1131134033203,
+      "learning_rate": 4.4217808123595846e-05,
+      "loss": 0.7286,
+      "step": 6550
+    },
+    {
+      "epoch": 2.7013147082990963,
+      "grad_norm": 20.303207397460938,
+      "learning_rate": 4.415395601241586e-05,
+      "loss": 0.7143,
+      "step": 6575
+    },
+    {
+      "epoch": 2.711585866885785,
+      "grad_norm": 8.981250762939453,
+      "learning_rate": 4.408979992186539e-05,
+      "loss": 0.7694,
+      "step": 6600
+    },
+    {
+      "epoch": 2.7218570254724734,
+      "grad_norm": 8.836969375610352,
+      "learning_rate": 4.402534087012201e-05,
+      "loss": 0.7134,
+      "step": 6625
+    },
+    {
+      "epoch": 2.732128184059162,
+      "grad_norm": 1.7732627391815186,
+      "learning_rate": 4.3960579880171346e-05,
+      "loss": 0.6537,
+      "step": 6650
+    },
+    {
+      "epoch": 2.7423993426458506,
+      "grad_norm": 22.986120223999023,
+      "learning_rate": 4.389551797979089e-05,
+      "loss": 0.7382,
+      "step": 6675
+    },
+    {
+      "epoch": 2.752670501232539,
+      "grad_norm": 7.212994575500488,
+      "learning_rate": 4.383015620153369e-05,
+      "loss": 0.6945,
+      "step": 6700
+    },
+    {
+      "epoch": 2.7629416598192273,
+      "grad_norm": 9.178274154663086,
+      "learning_rate": 4.376449558271194e-05,
+      "loss": 0.8691,
+      "step": 6725
+    },
+    {
+      "epoch": 2.7732128184059164,
+      "grad_norm": 22.552732467651367,
+      "learning_rate": 4.369853716538053e-05,
+      "loss": 0.7417,
+      "step": 6750
+    },
+    {
+      "epoch": 2.7834839769926045,
+      "grad_norm": 18.054012298583984,
+      "learning_rate": 4.3632281996320526e-05,
+      "loss": 0.7231,
+      "step": 6775
+    },
+    {
+      "epoch": 2.7937551355792936,
+      "grad_norm": 20.407691955566406,
+      "learning_rate": 4.3565731127022504e-05,
+      "loss": 0.6774,
+      "step": 6800
+    },
+    {
+      "epoch": 2.8040262941659817,
+      "grad_norm": 12.27515983581543,
+      "learning_rate": 4.349888561366991e-05,
+      "loss": 0.7521,
+      "step": 6825
+    },
+    {
+      "epoch": 2.8142974527526707,
+      "grad_norm": 32.21310806274414,
+      "learning_rate": 4.343174651712232e-05,
+      "loss": 0.6581,
+      "step": 6850
+    },
+    {
+      "epoch": 2.824568611339359,
+      "grad_norm": 19.447738647460938,
+      "learning_rate": 4.33643149028985e-05,
+      "loss": 0.73,
+      "step": 6875
+    },
+    {
+      "epoch": 2.834839769926048,
+      "grad_norm": 6.433927536010742,
+      "learning_rate": 4.329659184115963e-05,
+      "loss": 0.8247,
+      "step": 6900
+    },
+    {
+      "epoch": 2.845110928512736,
+      "grad_norm": 21.21660804748535,
+      "learning_rate": 4.322857840669222e-05,
+      "loss": 0.7508,
+      "step": 6925
+    },
+    {
+      "epoch": 2.855382087099425,
+      "grad_norm": 4.219921588897705,
+      "learning_rate": 4.3160275678891085e-05,
+      "loss": 0.7135,
+      "step": 6950
+    },
+    {
+      "epoch": 2.8656532456861132,
+      "grad_norm": 12.678546905517578,
+      "learning_rate": 4.309168474174224e-05,
+      "loss": 0.7757,
+      "step": 6975
+    },
+    {
+      "epoch": 2.875924404272802,
+      "grad_norm": 10.19421100616455,
+      "learning_rate": 4.3022806683805653e-05,
+      "loss": 0.6503,
+      "step": 7000
+    },
+    {
+      "epoch": 2.8861955628594904,
+      "grad_norm": 6.024997234344482,
+      "learning_rate": 4.2953642598198e-05,
+      "loss": 0.7013,
+      "step": 7025
+    },
+    {
+      "epoch": 2.896466721446179,
+      "grad_norm": 45.83235549926758,
+      "learning_rate": 4.288419358257531e-05,
+      "loss": 0.7507,
+      "step": 7050
+    },
+    {
+      "epoch": 2.9067378800328676,
+      "grad_norm": 13.147794723510742,
+      "learning_rate": 4.281446073911553e-05,
+      "loss": 0.6307,
+      "step": 7075
+    },
+    {
+      "epoch": 2.917009038619556,
+      "grad_norm": 26.889511108398438,
+      "learning_rate": 4.2744445174501045e-05,
+      "loss": 0.667,
+      "step": 7100
+    },
+    {
+      "epoch": 2.9272801972062448,
+      "grad_norm": 11.249592781066895,
+      "learning_rate": 4.2674147999901144e-05,
+      "loss": 0.7892,
+      "step": 7125
+    },
+    {
+      "epoch": 2.9375513557929334,
+      "grad_norm": 8.001336097717285,
+      "learning_rate": 4.260357033095431e-05,
+      "loss": 0.6859,
+      "step": 7150
+    },
+    {
+      "epoch": 2.947822514379622,
+      "grad_norm": 21.009946823120117,
+      "learning_rate": 4.25327132877506e-05,
+      "loss": 0.7299,
+      "step": 7175
+    },
+    {
+      "epoch": 2.9580936729663105,
+      "grad_norm": 14.191951751708984,
+      "learning_rate": 4.2461577994813814e-05,
+      "loss": 0.6725,
+      "step": 7200
+    },
+    {
+      "epoch": 2.968364831552999,
+      "grad_norm": 6.6921916007995605,
+      "learning_rate": 4.2390165581083654e-05,
+      "loss": 0.6824,
+      "step": 7225
+    },
+    {
+      "epoch": 2.9786359901396877,
+      "grad_norm": 39.17061233520508,
+      "learning_rate": 4.231847717989782e-05,
+      "loss": 0.7409,
+      "step": 7250
+    },
+    {
+      "epoch": 2.9889071487263763,
+      "grad_norm": 31.245113372802734,
+      "learning_rate": 4.224651392897404e-05,
+      "loss": 0.7009,
+      "step": 7275
+    },
+    {
+      "epoch": 2.999178307313065,
+      "grad_norm": 17.3699893951416,
+      "learning_rate": 4.217427697039198e-05,
+      "loss": 0.7931,
+      "step": 7300
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.6470769546902291,
+      "eval_f1_macro": 0.48909871800127924,
+      "eval_f1_micro": 0.6470769546902291,
+      "eval_f1_weighted": 0.6468481218149891,
+      "eval_loss": 0.8904216289520264,
+      "eval_precision_macro": 0.5045882669843967,
+      "eval_precision_micro": 0.6470769546902291,
+      "eval_precision_weighted": 0.6554944836574212,
+      "eval_recall_macro": 0.49646946445535234,
+      "eval_recall_micro": 0.6470769546902291,
+      "eval_recall_weighted": 0.6470769546902291,
+      "eval_runtime": 20.0256,
+      "eval_samples_per_second": 486.028,
+      "eval_steps_per_second": 15.231,
+      "step": 7302
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 21906,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 9,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.5370309738954752e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-7302/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b3846c407f453ea89f497122124bfe218afe31e6685d5602147a195a385fd0f
+size 5368

config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "_name_or_path": "ivanovsdesign/huawei-data-classification",
+  "_num_labels": 5,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "1",
+    "1": "2",
+    "2": "3",
+    "3": "4",
+    "4": "5"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "1": 0,
+    "2": 1,
+    "3": 2,
+    "4": 3,
+    "5": 4
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 119547
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1482e954dc7bbef048c49c06bdc65118b9138ec4ca7e91f7933627bf7f2df339
+size 711452684

runs/Nov09_23-13-04_6604152ce143/events.out.tfevents.1731193985.6604152ce143.674.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4f554538e5db7e9ca455e001ed0b47b55283ef14116b5599db2bc05caaefc38
-size 156759

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3c1d8a27a5a46e496f1d77ba07a257ae833f32c1b63e295e178a2f7102f67ee
+size 177317

runs/Nov09_23-13-04_6604152ce143/events.out.tfevents.1731197419.6604152ce143.674.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e92665de491602d132478b126e2841223ced61698002bbafebadd32a25da6538
+size 936

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": false,
+  "mask_token": "[MASK]",
+  "max_length": 256,
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b3846c407f453ea89f497122124bfe218afe31e6685d5602147a195a385fd0f
+size 5368

training_params.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "data_path": "rubert-base-ru-huawei-sentiment-fine-up/autotrain-data",
+    "model": "ivanovsdesign/huawei-data-classification",
+    "lr": 5e-05,
+    "epochs": 9,
+    "max_seq_length": 256,
+    "batch_size": 8,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "cosine_with_restarts",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "validation",
+    "text_column": "autotrain_text",
+    "target_column": "autotrain_label",
+    "logging_steps": -1,
+    "project_name": "rubert-base-ru-huawei-sentiment-fine-up",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "eval_strategy": "epoch",
+    "username": "ivanovsdesign",
+    "log": "tensorboard",
+    "early_stopping_patience": 5,
+    "early_stopping_threshold": 0.01
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff