ivanovsdesign
/

sbert-ru-huawei-gradient-accumulation

Text Classification

Trained with AutoTrain

Model card Files Files and versions Metrics Training metrics Community

sbert-ru-huawei-gradient-accumulation / checkpoint-304 /trainer_state.json

ivanovsdesign's picture

Upload folder using huggingface_hub

7f72002 verified 12 days ago

history blame contribute delete

3.79 kB

	{
	"best_metric": 0.8353803157806396,
	"best_model_checkpoint": "sbert-ru-huawei-gradient-accumulation/checkpoint-304",
	"epoch": 0.9991783073130649,
	"eval_steps": 500,
	"global_step": 304,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.08216926869350863,
	"grad_norm": 20.81386947631836,
	"learning_rate": 1.25e-05,
	"loss": 0.7474,
	"step": 25
	},
	{
	"epoch": 0.16433853738701726,
	"grad_norm": 24.849594116210938,
	"learning_rate": 2.554347826086957e-05,
	"loss": 0.7068,
	"step": 50
	},
	{
	"epoch": 0.2465078060805259,
	"grad_norm": 14.201815605163574,
	"learning_rate": 3.91304347826087e-05,
	"loss": 0.6739,
	"step": 75
	},
	{
	"epoch": 0.3286770747740345,
	"grad_norm": 19.431638717651367,
	"learning_rate": 4.9995413210794864e-05,
	"loss": 0.6802,
	"step": 100
	},
	{
	"epoch": 0.4108463434675431,
	"grad_norm": 17.281723022460938,
	"learning_rate": 4.9835052243991874e-05,
	"loss": 0.6518,
	"step": 125
	},
	{
	"epoch": 0.4930156121610518,
	"grad_norm": 21.763912200927734,
	"learning_rate": 4.944703213375648e-05,
	"loss": 0.6609,
	"step": 150
	},
	{
	"epoch": 0.5751848808545604,
	"grad_norm": 35.13481903076172,
	"learning_rate": 4.8834909801373264e-05,
	"loss": 0.6544,
	"step": 175
	},
	{
	"epoch": 0.657354149548069,
	"grad_norm": 24.737939834594727,
	"learning_rate": 4.800429647908354e-05,
	"loss": 0.6358,
	"step": 200
	},
	{
	"epoch": 0.7395234182415776,
	"grad_norm": 22.52487564086914,
	"learning_rate": 4.6962806272773564e-05,
	"loss": 0.6558,
	"step": 225
	},
	{
	"epoch": 0.8216926869350862,
	"grad_norm": 18.9419002532959,
	"learning_rate": 4.5719986364624866e-05,
	"loss": 0.628,
	"step": 250
	},
	{
	"epoch": 0.903861955628595,
	"grad_norm": 21.046478271484375,
	"learning_rate": 4.428722949554857e-05,
	"loss": 0.6414,
	"step": 275
	},
	{
	"epoch": 0.9860312243221035,
	"grad_norm": 23.134496688842773,
	"learning_rate": 4.267766952966369e-05,
	"loss": 0.6489,
	"step": 300
	},
	{
	"epoch": 0.9991783073130649,
	"eval_accuracy": 0.6643378197883489,
	"eval_f1_macro": 0.48932255599208974,
	"eval_f1_micro": 0.6643378197883489,
	"eval_f1_weighted": 0.6590895238835833,
	"eval_loss": 0.8353803157806396,
	"eval_precision_macro": 0.5100466324017191,
	"eval_precision_micro": 0.6643378197883489,
	"eval_precision_weighted": 0.6607527574495663,
	"eval_recall_macro": 0.48717407338779406,
	"eval_recall_micro": 0.6643378197883489,
	"eval_recall_weighted": 0.6643378197883489,
	"eval_runtime": 61.8774,
	"eval_samples_per_second": 157.295,
	"eval_steps_per_second": 4.929,
	"step": 304
	}
	],
	"logging_steps": 25,
	"max_steps": 912,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"EarlyStoppingCallback": {
	"args": {
	"early_stopping_patience": 5,
	"early_stopping_threshold": 0.01
	},
	"attributes": {
	"early_stopping_patience_counter": 0
	}
	},
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 1.8146751147933696e+16,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}